diff options
Diffstat (limited to 'main/openssl/crypto/sha/asm/sha256-armv4.s')
-rw-r--r-- | main/openssl/crypto/sha/asm/sha256-armv4.s | 3729 |
1 files changed, 2451 insertions, 1278 deletions
diff --git a/main/openssl/crypto/sha/asm/sha256-armv4.s b/main/openssl/crypto/sha/asm/sha256-armv4.s index 9c20a63c..853d7da5 100644 --- a/main/openssl/crypto/sha/asm/sha256-armv4.s +++ b/main/openssl/crypto/sha/asm/sha256-armv4.s @@ -23,1463 +23,1721 @@ K256: .word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .size K256,.-K256 +.word 0 @ terminator +.LOPENSSL_armcap: +.word OPENSSL_armcap_P-sha256_block_data_order +.align 5 .global sha256_block_data_order .type sha256_block_data_order,%function sha256_block_data_order: sub r3,pc,#8 @ sha256_block_data_order add r2,r1,r2,lsl#6 @ len to point at the end of inp +#if __ARM_ARCH__>=7 + ldr r12,.LOPENSSL_armcap + ldr r12,[r3,r12] @ OPENSSL_armcap_P + tst r12,#ARMV8_SHA256 + bne .LARMv8 + tst r12,#ARMV7_NEON + bne .LNEON +#endif stmdb sp!,{r0,r1,r2,r4-r11,lr} ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} - sub r14,r3,#256 @ K256 + sub r14,r3,#256+32 @ K256 sub sp,sp,#16*4 @ alloca(X[16]) .Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 0 + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) #endif - mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] eor r2,r9,r10 -#if 0>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 0==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 0>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 1 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] eor r2,r8,r9 -#if 1>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 1==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 1>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 2 + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) #endif - mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] eor r2,r7,r8 -#if 2>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 2==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 2>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 3 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] eor r2,r6,r7 -#if 3>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 3==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 3>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 4 + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) #endif - mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] eor r2,r5,r6 -#if 4>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 4==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 4>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 5 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] eor r2,r4,r5 -#if 5>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 5==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 5>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 6 + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) #endif - mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] eor r2,r11,r4 -#if 6>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 6==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 6>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 7 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] eor r2,r10,r11 -#if 7>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 7==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 7>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 8 + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) #endif - mov r0,r8,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] eor r2,r9,r10 -#if 8>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 8==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 8>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 9 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] eor r2,r8,r9 -#if 9>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 9==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 9>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 10 + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) #endif - mov r0,r6,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] eor r2,r7,r8 -#if 10>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 10==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 10>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 11 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] eor r2,r6,r7 -#if 11>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 11==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 11>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 12 + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) #endif - mov r0,r4,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] eor r2,r5,r6 -#if 12>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 12==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 12>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 13 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] eor r2,r4,r5 -#if 13>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 13==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 13>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 14 + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) #endif - mov r0,r10,ror#6 ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] eor r2,r11,r4 -#if 14>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 14==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 14>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) #if __ARM_ARCH__>=7 - ldr r3,[r1],#4 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) + rev r2,r2 #else - ldrb r3,[r1,#3] @ 15 - ldrb r12,[r1,#2] - ldrb r2,[r1,#1] - ldrb r0,[r1],#4 - orr r3,r3,r12,lsl#8 - orr r3,r3,r2,lsl#16 - orr r3,r3,r0,lsl#24 -#endif - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] eor r2,r10,r11 -#if 15>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 15==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 15>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) .Lrounds_16_xx: - @ ldr r1,[sp,#1*4] @ 16 - ldr r12,[sp,#14*4] - mov r0,r1,ror#7 - ldr r3,[sp,#0*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#9*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] eor r2,r9,r10 -#if 16>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 16==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#0*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 16>=15 - ldr r1,[sp,#2*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#2*4] @ 17 - ldr r12,[sp,#15*4] - mov r0,r1,ror#7 - ldr r3,[sp,#1*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#10*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] eor r2,r8,r9 -#if 17>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 17==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#1*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 17>=15 - ldr r1,[sp,#3*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#3*4] @ 18 - ldr r12,[sp,#0*4] - mov r0,r1,ror#7 - ldr r3,[sp,#2*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#11*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] eor r2,r7,r8 -#if 18>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 18==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#2*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 18>=15 - ldr r1,[sp,#4*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#4*4] @ 19 - ldr r12,[sp,#1*4] - mov r0,r1,ror#7 - ldr r3,[sp,#3*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#12*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] eor r2,r6,r7 -#if 19>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 19==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#3*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 19>=15 - ldr r1,[sp,#5*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#5*4] @ 20 - ldr r12,[sp,#2*4] - mov r0,r1,ror#7 - ldr r3,[sp,#4*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#13*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] eor r2,r5,r6 -#if 20>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 20==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#4*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 20>=15 - ldr r1,[sp,#6*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#6*4] @ 21 - ldr r12,[sp,#3*4] - mov r0,r1,ror#7 - ldr r3,[sp,#5*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#14*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] eor r2,r4,r5 -#if 21>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 21==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#5*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 21>=15 - ldr r1,[sp,#7*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#7*4] @ 22 - ldr r12,[sp,#4*4] - mov r0,r1,ror#7 - ldr r3,[sp,#6*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#15*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] eor r2,r11,r4 -#if 22>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 22==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#6*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 22>=15 - ldr r1,[sp,#8*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#8*4] @ 23 - ldr r12,[sp,#5*4] - mov r0,r1,ror#7 - ldr r3,[sp,#7*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#0*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] eor r2,r10,r11 -#if 23>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 23==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#7*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 23>=15 - ldr r1,[sp,#9*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - @ ldr r1,[sp,#9*4] @ 24 - ldr r12,[sp,#6*4] - mov r0,r1,ror#7 - ldr r3,[sp,#8*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#1*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r8,ror#6 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r8,ror#11 + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] eor r2,r9,r10 -#if 24>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 24==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r8,ror#25 @ Sigma1(e) + add r11,r11,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r8 - str r3,[sp,#8*4] - add r3,r3,r0 + add r11,r11,r12 @ h+=K256[i] eor r2,r2,r10 @ Ch(e,f,g) - add r3,r3,r11 - mov r11,r4,ror#2 - add r3,r3,r2 - eor r11,r11,r4,ror#13 - add r3,r3,r12 - eor r11,r11,r4,ror#22 @ Sigma0(a) -#if 24>=15 - ldr r1,[sp,#10*4] @ from BODY_16_xx -#endif - orr r0,r4,r5 - and r2,r4,r5 - and r0,r0,r6 - add r11,r11,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r7,r7,r3 - add r11,r11,r0 - @ ldr r1,[sp,#10*4] @ 25 - ldr r12,[sp,#7*4] - mov r0,r1,ror#7 - ldr r3,[sp,#9*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#2*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r7,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r7,ror#11 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] eor r2,r8,r9 -#if 25>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 25==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r7,ror#25 @ Sigma1(e) + add r10,r10,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r7 - str r3,[sp,#9*4] - add r3,r3,r0 + add r10,r10,r3 @ h+=K256[i] eor r2,r2,r9 @ Ch(e,f,g) - add r3,r3,r10 - mov r10,r11,ror#2 - add r3,r3,r2 - eor r10,r10,r11,ror#13 - add r3,r3,r12 - eor r10,r10,r11,ror#22 @ Sigma0(a) -#if 25>=15 - ldr r1,[sp,#11*4] @ from BODY_16_xx -#endif - orr r0,r11,r4 - and r2,r11,r4 - and r0,r0,r5 - add r10,r10,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r6,r6,r3 - add r10,r10,r0 - @ ldr r1,[sp,#11*4] @ 26 - ldr r12,[sp,#8*4] - mov r0,r1,ror#7 - ldr r3,[sp,#10*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#3*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r6,ror#6 + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r6,ror#11 + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] eor r2,r7,r8 -#if 26>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 26==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r6,ror#25 @ Sigma1(e) + add r9,r9,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r6 - str r3,[sp,#10*4] - add r3,r3,r0 + add r9,r9,r12 @ h+=K256[i] eor r2,r2,r8 @ Ch(e,f,g) - add r3,r3,r9 - mov r9,r10,ror#2 - add r3,r3,r2 - eor r9,r9,r10,ror#13 - add r3,r3,r12 - eor r9,r9,r10,ror#22 @ Sigma0(a) -#if 26>=15 - ldr r1,[sp,#12*4] @ from BODY_16_xx -#endif - orr r0,r10,r11 - and r2,r10,r11 - and r0,r0,r4 - add r9,r9,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r5,r5,r3 - add r9,r9,r0 - @ ldr r1,[sp,#12*4] @ 27 - ldr r12,[sp,#9*4] - mov r0,r1,ror#7 - ldr r3,[sp,#11*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#4*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r5,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r5,ror#11 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] eor r2,r6,r7 -#if 27>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 27==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r5,ror#25 @ Sigma1(e) + add r8,r8,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r5 - str r3,[sp,#11*4] - add r3,r3,r0 + add r8,r8,r3 @ h+=K256[i] eor r2,r2,r7 @ Ch(e,f,g) - add r3,r3,r8 - mov r8,r9,ror#2 - add r3,r3,r2 - eor r8,r8,r9,ror#13 - add r3,r3,r12 - eor r8,r8,r9,ror#22 @ Sigma0(a) -#if 27>=15 - ldr r1,[sp,#13*4] @ from BODY_16_xx -#endif - orr r0,r9,r10 - and r2,r9,r10 - and r0,r0,r11 - add r8,r8,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r4,r4,r3 - add r8,r8,r0 - @ ldr r1,[sp,#13*4] @ 28 - ldr r12,[sp,#10*4] - mov r0,r1,ror#7 - ldr r3,[sp,#12*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#5*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r4,ror#6 + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r4,ror#11 + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] eor r2,r5,r6 -#if 28>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 28==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r4,ror#25 @ Sigma1(e) + add r7,r7,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r4 - str r3,[sp,#12*4] - add r3,r3,r0 + add r7,r7,r12 @ h+=K256[i] eor r2,r2,r6 @ Ch(e,f,g) - add r3,r3,r7 - mov r7,r8,ror#2 - add r3,r3,r2 - eor r7,r7,r8,ror#13 - add r3,r3,r12 - eor r7,r7,r8,ror#22 @ Sigma0(a) -#if 28>=15 - ldr r1,[sp,#14*4] @ from BODY_16_xx -#endif - orr r0,r8,r9 - and r2,r8,r9 - and r0,r0,r10 - add r7,r7,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r11,r11,r3 - add r7,r7,r0 - @ ldr r1,[sp,#14*4] @ 29 - ldr r12,[sp,#11*4] - mov r0,r1,ror#7 - ldr r3,[sp,#13*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#6*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r11,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r11,ror#11 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] eor r2,r4,r5 -#if 29>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 29==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r11,ror#25 @ Sigma1(e) + add r6,r6,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r11 - str r3,[sp,#13*4] - add r3,r3,r0 + add r6,r6,r3 @ h+=K256[i] eor r2,r2,r5 @ Ch(e,f,g) - add r3,r3,r6 - mov r6,r7,ror#2 - add r3,r3,r2 - eor r6,r6,r7,ror#13 - add r3,r3,r12 - eor r6,r6,r7,ror#22 @ Sigma0(a) -#if 29>=15 - ldr r1,[sp,#15*4] @ from BODY_16_xx -#endif - orr r0,r7,r8 - and r2,r7,r8 - and r0,r0,r9 - add r6,r6,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r10,r10,r3 - add r6,r6,r0 - @ ldr r1,[sp,#15*4] @ 30 - ldr r12,[sp,#12*4] - mov r0,r1,ror#7 - ldr r3,[sp,#14*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#7*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 - add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r10,ror#6 + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r10,ror#11 + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] eor r2,r11,r4 -#if 30>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 30==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r10,ror#25 @ Sigma1(e) + add r5,r5,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r10 - str r3,[sp,#14*4] - add r3,r3,r0 + add r5,r5,r12 @ h+=K256[i] eor r2,r2,r4 @ Ch(e,f,g) - add r3,r3,r5 - mov r5,r6,ror#2 - add r3,r3,r2 - eor r5,r5,r6,ror#13 - add r3,r3,r12 - eor r5,r5,r6,ror#22 @ Sigma0(a) -#if 30>=15 - ldr r1,[sp,#0*4] @ from BODY_16_xx -#endif - orr r0,r6,r7 - and r2,r6,r7 - and r0,r0,r8 - add r5,r5,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r9,r9,r3 - add r5,r5,r0 - @ ldr r1,[sp,#0*4] @ 31 - ldr r12,[sp,#13*4] - mov r0,r1,ror#7 - ldr r3,[sp,#15*4] - eor r0,r0,r1,ror#18 - ldr r2,[sp,#8*4] - eor r0,r0,r1,lsr#3 @ sigma0(X[i+1]) - mov r1,r12,ror#17 + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + add r3,r3,r0 - eor r1,r1,r12,ror#19 - add r3,r3,r2 - eor r1,r1,r12,lsr#10 @ sigma1(X[i+14]) - @ add r3,r3,r1 - mov r0,r9,ror#6 - ldr r12,[r14],#4 @ *K256++ - eor r0,r0,r9,ror#11 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] eor r2,r10,r11 -#if 31>=16 - add r3,r3,r1 @ from BODY_16_xx -#elif __ARM_ARCH__>=7 && defined(__ARMEL__) - rev r3,r3 -#endif -#if 31==15 - str r1,[sp,#17*4] @ leave room for r1 -#endif - eor r0,r0,r9,ror#25 @ Sigma1(e) + add r4,r4,r0,ror#6 @ h+=Sigma1(e) and r2,r2,r9 - str r3,[sp,#15*4] - add r3,r3,r0 + add r4,r4,r3 @ h+=K256[i] eor r2,r2,r11 @ Ch(e,f,g) - add r3,r3,r4 - mov r4,r5,ror#2 - add r3,r3,r2 - eor r4,r4,r5,ror#13 - add r3,r3,r12 - eor r4,r4,r5,ror#22 @ Sigma0(a) -#if 31>=15 - ldr r1,[sp,#1*4] @ from BODY_16_xx -#endif - orr r0,r5,r6 - and r2,r5,r6 - and r0,r0,r7 - add r4,r4,r3 - orr r0,r0,r2 @ Maj(a,b,c) - add r8,r8,r3 - add r4,r4,r0 - and r12,r12,#0xff - cmp r12,#0xf2 + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + ldreq r3,[sp,#16*4] @ pull ctx bne .Lrounds_16_xx - ldr r3,[sp,#16*4] @ pull ctx + add r4,r4,r12 @ h+=Maj(a,b,c) from the past ldr r0,[r3,#0] ldr r2,[r3,#4] ldr r12,[r3,#8] @@ -1512,6 +1770,921 @@ sha256_block_data_order: moveq pc,lr @ be binary compatible with V4, yet .word 0xe12fff1e @ interoperable with Thumb ISA:-) #endif -.size sha256_block_data_order,.-sha256_block_data_order -.asciz "SHA256 block transform for ARMv4, CRYPTOGAMS by <appro@openssl.org>" +.size sha256_block_data_order,.-sha256_block_data_order +#if __ARM_ARCH__>=7 +.fpu neon + +.type sha256_block_data_order_neon,%function +.align 4 +sha256_block_data_order_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + mov r12,sp + sub sp,sp,#16*4+16 @ alloca + sub r14,r3,#256+32 @ K256 + bic sp,sp,#15 @ align for 128-bit stores + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size sha256_block_data_order_neon,.-sha256_block_data_order_neon +#endif +#if __ARM_ARCH__>=7 +.type sha256_block_data_order_armv8,%function +.align 5 +sha256_block_data_order_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] + sub r3,r3,#sha256_block_data_order-K256 + +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + .byte 0xe2,0x03,0xfa,0xf3 @ sha256su0 q8,q9 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe6,0x0c,0x64,0xf3 @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + .byte 0xe4,0x23,0xfa,0xf3 @ sha256su0 q9,q10 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe0,0x2c,0x66,0xf3 @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + .byte 0xe6,0x43,0xfa,0xf3 @ sha256su0 q10,q11 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + .byte 0xe2,0x4c,0x60,0xf3 @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + .byte 0xe0,0x63,0xfa,0xf3 @ sha256su0 q11,q8 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + .byte 0xe4,0x6c,0x62,0xf3 @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + .byte 0x68,0x0c,0x02,0xf3 @ sha256h q0,q1,q12 + .byte 0x68,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + .byte 0x6a,0x0c,0x02,0xf3 @ sha256h q0,q1,q13 + .byte 0x6a,0x2c,0x14,0xf3 @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8 +#endif +.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>" .align 2 +.comm OPENSSL_armcap_P,4,4 |