summaryrefslogtreecommitdiff
path: root/main/openssl/crypto/aes/asm/aesv8-armx-64.S
diff options
context:
space:
mode:
Diffstat (limited to 'main/openssl/crypto/aes/asm/aesv8-armx-64.S')
m---------main/openssl0
-rw-r--r--main/openssl/crypto/aes/asm/aesv8-armx-64.S761
2 files changed, 0 insertions, 761 deletions
diff --git a/main/openssl b/main/openssl
new file mode 160000
+Subproject 4d377a9ce111930d8a8f06dc0e94a892a7f6c51
diff --git a/main/openssl/crypto/aes/asm/aesv8-armx-64.S b/main/openssl/crypto/aes/asm/aesv8-armx-64.S
deleted file mode 100644
index be0a13df..00000000
--- a/main/openssl/crypto/aes/asm/aesv8-armx-64.S
+++ /dev/null
@@ -1,761 +0,0 @@
-#include "arm_arch.h"
-
-#if __ARM_ARCH__>=7
-.text
-.arch armv8-a+crypto
-.align 5
-rcon:
-.long 0x01,0x01,0x01,0x01
-.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
-.long 0x1b,0x1b,0x1b,0x1b
-
-.globl aes_v8_set_encrypt_key
-.type aes_v8_set_encrypt_key,%function
-.align 5
-aes_v8_set_encrypt_key:
-.Lenc_key:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
- adr x3,rcon
- cmp w1,#192
-
- eor v0.16b,v0.16b,v0.16b
- ld1 {v3.16b},[x0],#16
- mov w1,#8 // reuse w1
- ld1 {v1.4s,v2.4s},[x3],#32
-
- b.lt .Loop128
- b.eq .L192
- b .L256
-
-.align 4
-.Loop128:
- tbl v6.16b,{v3.16b},v2.16b
- ext v5.16b,v0.16b,v3.16b,#12
- st1 {v3.4s},[x2],#16
- aese v6.16b,v0.16b
- subs w1,w1,#1
-
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
- eor v3.16b,v3.16b,v5.16b
- shl v1.16b,v1.16b,#1
- eor v3.16b,v3.16b,v6.16b
- b.ne .Loop128
-
- ld1 {v1.4s},[x3]
-
- tbl v6.16b,{v3.16b},v2.16b
- ext v5.16b,v0.16b,v3.16b,#12
- st1 {v3.4s},[x2],#16
- aese v6.16b,v0.16b
-
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
- eor v3.16b,v3.16b,v5.16b
- shl v1.16b,v1.16b,#1
- eor v3.16b,v3.16b,v6.16b
-
- tbl v6.16b,{v3.16b},v2.16b
- ext v5.16b,v0.16b,v3.16b,#12
- st1 {v3.4s},[x2],#16
- aese v6.16b,v0.16b
-
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
- eor v3.16b,v3.16b,v5.16b
- eor v3.16b,v3.16b,v6.16b
- st1 {v3.4s},[x2]
- add x2,x2,#0x50
-
- mov w12,#10
- b .Ldone
-
-.align 4
-.L192:
- ld1 {v4.8b},[x0],#8
- movi v6.16b,#8 // borrow v6.16b
- st1 {v3.4s},[x2],#16
- sub v2.16b,v2.16b,v6.16b // adjust the mask
-
-.Loop192:
- tbl v6.16b,{v4.16b},v2.16b
- ext v5.16b,v0.16b,v3.16b,#12
- st1 {v4.8b},[x2],#8
- aese v6.16b,v0.16b
- subs w1,w1,#1
-
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
-
- dup v5.4s,v3.s[3]
- eor v5.16b,v5.16b,v4.16b
- eor v6.16b,v6.16b,v1.16b
- ext v4.16b,v0.16b,v4.16b,#12
- shl v1.16b,v1.16b,#1
- eor v4.16b,v4.16b,v5.16b
- eor v3.16b,v3.16b,v6.16b
- eor v4.16b,v4.16b,v6.16b
- st1 {v3.4s},[x2],#16
- b.ne .Loop192
-
- mov w12,#12
- add x2,x2,#0x20
- b .Ldone
-
-.align 4
-.L256:
- ld1 {v4.16b},[x0]
- mov w1,#7
- mov w12,#14
- st1 {v3.4s},[x2],#16
-
-.Loop256:
- tbl v6.16b,{v4.16b},v2.16b
- ext v5.16b,v0.16b,v3.16b,#12
- st1 {v4.4s},[x2],#16
- aese v6.16b,v0.16b
- subs w1,w1,#1
-
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v3.16b,v3.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v6.16b,v6.16b,v1.16b
- eor v3.16b,v3.16b,v5.16b
- shl v1.16b,v1.16b,#1
- eor v3.16b,v3.16b,v6.16b
- st1 {v3.4s},[x2],#16
- b.eq .Ldone
-
- dup v6.4s,v3.s[3] // just splat
- ext v5.16b,v0.16b,v4.16b,#12
- aese v6.16b,v0.16b
-
- eor v4.16b,v4.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v4.16b,v4.16b,v5.16b
- ext v5.16b,v0.16b,v5.16b,#12
- eor v4.16b,v4.16b,v5.16b
-
- eor v4.16b,v4.16b,v6.16b
- b .Loop256
-
-.Ldone:
- str w12,[x2]
-
- eor x0,x0,x0 // return value
- ldr x29,[sp],#16
- ret
-.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
-
-.globl aes_v8_set_decrypt_key
-.type aes_v8_set_decrypt_key,%function
-.align 5
-aes_v8_set_decrypt_key:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
- bl .Lenc_key
-
- sub x2,x2,#240 // restore original x2
- mov x4,#-16
- add x0,x2,x12,lsl#4 // end of key schedule
-
- ld1 {v0.4s},[x2]
- ld1 {v1.4s},[x0]
- st1 {v0.4s},[x0],x4
- st1 {v1.4s},[x2],#16
-
-.Loop_imc:
- ld1 {v0.4s},[x2]
- ld1 {v1.4s},[x0]
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- st1 {v0.4s},[x0],x4
- st1 {v1.4s},[x2],#16
- cmp x0,x2
- b.hi .Loop_imc
-
- ld1 {v0.4s},[x2]
- aesimc v0.16b,v0.16b
- st1 {v0.4s},[x0]
-
- eor x0,x0,x0 // return value
- ldp x29,x30,[sp],#16
- ret
-.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
-.globl aes_v8_encrypt
-.type aes_v8_encrypt,%function
-.align 5
-aes_v8_encrypt:
- ldr w3,[x2,#240]
- ld1 {v0.4s},[x2],#16
- ld1 {v2.16b},[x0]
- sub w3,w3,#2
- ld1 {v1.4s},[x2],#16
-
-.Loop_enc:
- aese v2.16b,v0.16b
- ld1 {v0.4s},[x2],#16
- aesmc v2.16b,v2.16b
- subs w3,w3,#2
- aese v2.16b,v1.16b
- ld1 {v1.4s},[x2],#16
- aesmc v2.16b,v2.16b
- b.gt .Loop_enc
-
- aese v2.16b,v0.16b
- ld1 {v0.4s},[x2]
- aesmc v2.16b,v2.16b
- aese v2.16b,v1.16b
- eor v2.16b,v2.16b,v0.16b
-
- st1 {v2.16b},[x1]
- ret
-.size aes_v8_encrypt,.-aes_v8_encrypt
-.globl aes_v8_decrypt
-.type aes_v8_decrypt,%function
-.align 5
-aes_v8_decrypt:
- ldr w3,[x2,#240]
- ld1 {v0.4s},[x2],#16
- ld1 {v2.16b},[x0]
- sub w3,w3,#2
- ld1 {v1.4s},[x2],#16
-
-.Loop_dec:
- aesd v2.16b,v0.16b
- ld1 {v0.4s},[x2],#16
- aesimc v2.16b,v2.16b
- subs w3,w3,#2
- aesd v2.16b,v1.16b
- ld1 {v1.4s},[x2],#16
- aesimc v2.16b,v2.16b
- b.gt .Loop_dec
-
- aesd v2.16b,v0.16b
- ld1 {v0.4s},[x2]
- aesimc v2.16b,v2.16b
- aesd v2.16b,v1.16b
- eor v2.16b,v2.16b,v0.16b
-
- st1 {v2.16b},[x1]
- ret
-.size aes_v8_decrypt,.-aes_v8_decrypt
-.globl aes_v8_cbc_encrypt
-.type aes_v8_cbc_encrypt,%function
-.align 5
-aes_v8_cbc_encrypt:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
- subs x2,x2,#16
- mov x8,#16
- b.lo .Lcbc_abort
- csel x8,xzr,x8,eq
-
- cmp w5,#0 // en- or decrypting?
- ldr w5,[x3,#240]
- and x2,x2,#-16
- ld1 {v6.16b},[x4]
- ld1 {v0.16b},[x0],x8
-
- ld1 {v16.4s-v17.4s},[x3] // load key schedule...
- sub w5,w5,#6
- add x7,x3,x5,lsl#4 // pointer to last 7 round keys
- sub w5,w5,#2
- ld1 {v18.4s-v19.4s},[x7],#32
- ld1 {v20.4s-v21.4s},[x7],#32
- ld1 {v22.4s-v23.4s},[x7],#32
- ld1 {v7.4s},[x7]
-
- add x7,x3,#32
- mov w6,w5
- b.eq .Lcbc_dec
-
- cmp w5,#2
- eor v0.16b,v0.16b,v6.16b
- eor v5.16b,v16.16b,v7.16b
- b.eq .Lcbc_enc128
-
-.Loop_cbc_enc:
- aese v0.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesmc v0.16b,v0.16b
- subs w6,w6,#2
- aese v0.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesmc v0.16b,v0.16b
- b.gt .Loop_cbc_enc
-
- aese v0.16b,v16.16b
- aesmc v0.16b,v0.16b
- subs x2,x2,#16
- aese v0.16b,v17.16b
- aesmc v0.16b,v0.16b
- csel x8,xzr,x8,eq
- aese v0.16b,v18.16b
- aesmc v0.16b,v0.16b
- add x7,x3,#16
- aese v0.16b,v19.16b
- aesmc v0.16b,v0.16b
- ld1 {v16.16b},[x0],x8
- aese v0.16b,v20.16b
- aesmc v0.16b,v0.16b
- eor v16.16b,v16.16b,v5.16b
- aese v0.16b,v21.16b
- aesmc v0.16b,v0.16b
- ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
- aese v0.16b,v22.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v23.16b
-
- mov w6,w5
- eor v6.16b,v0.16b,v7.16b
- st1 {v6.16b},[x1],#16
- b.hs .Loop_cbc_enc
-
- b .Lcbc_done
-
-.align 5
-.Lcbc_enc128:
- ld1 {v2.4s-v3.4s},[x7]
- aese v0.16b,v16.16b
- aesmc v0.16b,v0.16b
- b .Lenter_cbc_enc128
-.Loop_cbc_enc128:
- aese v0.16b,v16.16b
- aesmc v0.16b,v0.16b
- st1 {v6.16b},[x1],#16
-.Lenter_cbc_enc128:
- aese v0.16b,v17.16b
- aesmc v0.16b,v0.16b
- subs x2,x2,#16
- aese v0.16b,v2.16b
- aesmc v0.16b,v0.16b
- csel x8,xzr,x8,eq
- aese v0.16b,v3.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v18.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v19.16b
- aesmc v0.16b,v0.16b
- ld1 {v16.16b},[x0],x8
- aese v0.16b,v20.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v21.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v22.16b
- aesmc v0.16b,v0.16b
- eor v16.16b,v16.16b,v5.16b
- aese v0.16b,v23.16b
- eor v6.16b,v0.16b,v7.16b
- b.hs .Loop_cbc_enc128
-
- st1 {v6.16b},[x1],#16
- b .Lcbc_done
-
-.align 5
-.Lcbc_dec128:
- ld1 {v4.4s-v5.4s},[x7]
- eor v6.16b,v6.16b,v7.16b
- eor v2.16b,v0.16b,v7.16b
- mov x12,x8
-
-.Loop2x_cbc_dec128:
- aesd v0.16b,v16.16b
- aesd v1.16b,v16.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- subs x2,x2,#32
- aesd v0.16b,v17.16b
- aesd v1.16b,v17.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- csel x8,xzr,x8,lo
- aesd v0.16b,v4.16b
- aesd v1.16b,v4.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- csel x12,xzr,x12,ls
- aesd v0.16b,v5.16b
- aesd v1.16b,v5.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v18.16b
- aesd v1.16b,v18.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v19.16b
- aesd v1.16b,v19.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v20.16b
- aesd v1.16b,v20.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v21.16b
- aesd v1.16b,v21.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v22.16b
- aesd v1.16b,v22.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- aesd v0.16b,v23.16b
- aesd v1.16b,v23.16b
-
- eor v6.16b,v6.16b,v0.16b
- ld1 {v0.16b},[x0],x8
- eor v2.16b,v2.16b,v1.16b
- ld1 {v1.16b},[x0],x12
- st1 {v6.16b},[x1],#16
- eor v6.16b,v3.16b,v7.16b
- st1 {v2.16b},[x1],#16
- eor v2.16b,v0.16b,v7.16b
- orr v3.16b,v1.16b,v1.16b
- b.hs .Loop2x_cbc_dec128
-
- adds x2,x2,#32
- eor v6.16b,v6.16b,v7.16b
- b.eq .Lcbc_done
- eor v2.16b,v2.16b,v7.16b
- b .Lcbc_dec_tail
-
-.align 5
-.Lcbc_dec:
- subs x2,x2,#16
- orr v2.16b,v0.16b,v0.16b
- b.lo .Lcbc_dec_tail
-
- csel x8,xzr,x8,eq
- cmp w5,#2
- ld1 {v1.16b},[x0],x8
- orr v3.16b,v1.16b,v1.16b
- b.eq .Lcbc_dec128
-
-.Loop2x_cbc_dec:
- aesd v0.16b,v16.16b
- aesd v1.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- subs w6,w6,#2
- aesd v0.16b,v17.16b
- aesd v1.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- b.gt .Loop2x_cbc_dec
-
- aesd v0.16b,v16.16b
- aesd v1.16b,v16.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- eor v4.16b,v6.16b,v7.16b
- eor v5.16b,v2.16b,v7.16b
- aesd v0.16b,v17.16b
- aesd v1.16b,v17.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- orr v6.16b,v3.16b,v3.16b
- subs x2,x2,#32
- aesd v0.16b,v18.16b
- aesd v1.16b,v18.16b
- aesimc v0.16b,v0.16b
- csel x8,xzr,x8,lo
- aesimc v1.16b,v1.16b
- mov x7,x3
- aesd v0.16b,v19.16b
- aesd v1.16b,v19.16b
- aesimc v0.16b,v0.16b
- ld1 {v2.16b},[x0],x8
- aesimc v1.16b,v1.16b
- csel x8,xzr,x8,ls
- aesd v0.16b,v20.16b
- aesd v1.16b,v20.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- ld1 {v3.16b},[x0],x8
- aesd v0.16b,v21.16b
- aesd v1.16b,v21.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
- aesd v0.16b,v22.16b
- aesd v1.16b,v22.16b
- aesimc v0.16b,v0.16b
- aesimc v1.16b,v1.16b
- ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
- aesd v0.16b,v23.16b
- aesd v1.16b,v23.16b
-
- mov w6,w5
- eor v4.16b,v4.16b,v0.16b
- eor v5.16b,v5.16b,v1.16b
- orr v0.16b,v2.16b,v2.16b
- st1 {v4.16b},[x1],#16
- orr v1.16b,v3.16b,v3.16b
- st1 {v5.16b},[x1],#16
- b.hs .Loop2x_cbc_dec
-
- adds x2,x2,#32
- b.eq .Lcbc_done
-
-.Lcbc_dec_tail:
- aesd v0.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesimc v0.16b,v0.16b
- subs w6,w6,#2
- aesd v0.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesimc v0.16b,v0.16b
- b.gt .Lcbc_dec_tail
-
- aesd v0.16b,v16.16b
- aesimc v0.16b,v0.16b
- aesd v0.16b,v17.16b
- aesimc v0.16b,v0.16b
- eor v4.16b,v6.16b,v7.16b
- aesd v0.16b,v18.16b
- aesimc v0.16b,v0.16b
- orr v6.16b,v2.16b,v2.16b
- aesd v0.16b,v19.16b
- aesimc v0.16b,v0.16b
- aesd v0.16b,v20.16b
- aesimc v0.16b,v0.16b
- aesd v0.16b,v21.16b
- aesimc v0.16b,v0.16b
- aesd v0.16b,v22.16b
- aesimc v0.16b,v0.16b
- aesd v0.16b,v23.16b
-
- eor v4.16b,v4.16b,v0.16b
- st1 {v4.16b},[x1],#16
-
-.Lcbc_done:
- st1 {v6.16b},[x4]
-.Lcbc_abort:
- ldr x29,[sp],#16
- ret
-.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
-.globl aes_v8_ctr32_encrypt_blocks
-.type aes_v8_ctr32_encrypt_blocks,%function
-.align 5
-aes_v8_ctr32_encrypt_blocks:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
- ldr w5,[x3,#240]
-
- ldr w8, [x4, #12]
- ld1 {v0.4s},[x4]
-
- ld1 {v16.4s-v17.4s},[x3] // load key schedule...
- sub w5,w5,#6
- add x7,x3,x5,lsl#4 // pointer to last 7 round keys
- sub w5,w5,#2
- ld1 {v18.4s-v19.4s},[x7],#32
- ld1 {v20.4s-v21.4s},[x7],#32
- ld1 {v22.4s-v23.4s},[x7],#32
- ld1 {v7.4s},[x7]
-
- add x7,x3,#32
- mov w6,w5
-
- subs x2,x2,#2
- b.lo .Lctr32_tail
-
-#ifndef __ARMEB__
- rev w8, w8
-#endif
- orr v1.16b,v0.16b,v0.16b
- add w8, w8, #1
- orr v6.16b,v0.16b,v0.16b
- rev w10, w8
- cmp w5,#2
- mov v1.s[3],w10
- b.eq .Lctr32_128
-
-.Loop2x_ctr32:
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- subs w6,w6,#2
- aese v0.16b,v17.16b
- aese v1.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- b.gt .Loop2x_ctr32
-
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- aesmc v4.16b,v0.16b
- orr v0.16b,v6.16b,v6.16b
- aesmc v5.16b,v1.16b
- orr v1.16b,v6.16b,v6.16b
- aese v4.16b,v17.16b
- aese v5.16b,v17.16b
- ld1 {v2.16b},[x0],#16
- aesmc v4.16b,v4.16b
- ld1 {v3.16b},[x0],#16
- aesmc v5.16b,v5.16b
- add w8,w8,#1
- aese v4.16b,v18.16b
- aese v5.16b,v18.16b
- rev w9,w8
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- add w8,w8,#1
- aese v4.16b,v19.16b
- aese v5.16b,v19.16b
- eor v2.16b,v2.16b,v7.16b
- rev w10,w8
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- eor v3.16b,v3.16b,v7.16b
- mov x7,x3
- aese v4.16b,v20.16b
- aese v5.16b,v20.16b
- subs x2,x2,#2
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- ld1 {v16.4s-v17.4s},[x7],#32 // re-pre-load rndkey[0-1]
- aese v4.16b,v21.16b
- aese v5.16b,v21.16b
- aesmc v4.16b,v4.16b
- aesmc v5.16b,v5.16b
- aese v4.16b,v22.16b
- aese v5.16b,v22.16b
- mov v0.s[3], w9
- aesmc v4.16b,v4.16b
- mov v1.s[3], w10
- aesmc v5.16b,v5.16b
- aese v4.16b,v23.16b
- aese v5.16b,v23.16b
-
- mov w6,w5
- eor v2.16b,v2.16b,v4.16b
- eor v3.16b,v3.16b,v5.16b
- st1 {v2.16b},[x1],#16
- st1 {v3.16b},[x1],#16
- b.hs .Loop2x_ctr32
-
- adds x2,x2,#2
- b.eq .Lctr32_done
- b .Lctr32_tail
-
-.Lctr32_128:
- ld1 {v4.4s-v5.4s},[x7]
-
-.Loop2x_ctr32_128:
- aese v0.16b,v16.16b
- aese v1.16b,v16.16b
- aesmc v0.16b,v0.16b
- ld1 {v2.16b},[x0],#16
- aesmc v1.16b,v1.16b
- ld1 {v3.16b},[x0],#16
- aese v0.16b,v17.16b
- aese v1.16b,v17.16b
- add w8,w8,#1
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- rev w9,w8
- aese v0.16b,v4.16b
- aese v1.16b,v4.16b
- add w8,w8,#1
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- rev w10,w8
- aese v0.16b,v5.16b
- aese v1.16b,v5.16b
- subs x2,x2,#2
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v18.16b
- aese v1.16b,v18.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v19.16b
- aese v1.16b,v19.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v20.16b
- aese v1.16b,v20.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v21.16b
- aese v1.16b,v21.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- aese v0.16b,v22.16b
- aese v1.16b,v22.16b
- aesmc v0.16b,v0.16b
- aesmc v1.16b,v1.16b
- eor v2.16b,v2.16b,v7.16b
- aese v0.16b,v23.16b
- eor v3.16b,v3.16b,v7.16b
- aese v1.16b,v23.16b
-
- eor v2.16b,v2.16b,v0.16b
- orr v0.16b,v6.16b,v6.16b
- eor v3.16b,v3.16b,v1.16b
- orr v1.16b,v6.16b,v6.16b
- st1 {v2.16b},[x1],#16
- mov v0.s[3], w9
- st1 {v3.16b},[x1],#16
- mov v1.s[3], w10
- b.hs .Loop2x_ctr32_128
-
- adds x2,x2,#2
- b.eq .Lctr32_done
-
-.Lctr32_tail:
- aese v0.16b,v16.16b
- ld1 {v16.4s},[x7],#16
- aesmc v0.16b,v0.16b
- subs w6,w6,#2
- aese v0.16b,v17.16b
- ld1 {v17.4s},[x7],#16
- aesmc v0.16b,v0.16b
- b.gt .Lctr32_tail
-
- aese v0.16b,v16.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v17.16b
- aesmc v0.16b,v0.16b
- ld1 {v2.16b},[x0]
- aese v0.16b,v18.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v19.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v20.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v21.16b
- aesmc v0.16b,v0.16b
- aese v0.16b,v22.16b
- aesmc v0.16b,v0.16b
- eor v2.16b,v2.16b,v7.16b
- aese v0.16b,v23.16b
-
- eor v2.16b,v2.16b,v0.16b
- st1 {v2.16b},[x1]
-
-.Lctr32_done:
- ldr x29,[sp],#16
- ret
-.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
-#endif