summaryrefslogtreecommitdiff
path: root/app/openssl/crypto/bn/asm/mips-mont.S
diff options
context:
space:
mode:
Diffstat (limited to 'app/openssl/crypto/bn/asm/mips-mont.S')
-rw-r--r--app/openssl/crypto/bn/asm/mips-mont.S284
1 files changed, 284 insertions, 0 deletions
diff --git a/app/openssl/crypto/bn/asm/mips-mont.S b/app/openssl/crypto/bn/asm/mips-mont.S
new file mode 100644
index 00000000..1b875a2a
--- /dev/null
+++ b/app/openssl/crypto/bn/asm/mips-mont.S
@@ -0,0 +1,284 @@
+.text
+
+.set noat
+.set noreorder
+
+.align 5
+.globl bn_mul_mont
+.ent bn_mul_mont
+bn_mul_mont:
+ lw $8,16($29)
+ lw $9,20($29)
+ slt $1,$9,4
+ bnez $1,1f
+ li $2,0
+ slt $1,$9,17 # on in-order CPU
+ bnez $1,bn_mul_mont_internal
+ nop
+1: jr $31
+ li $4,0
+.end bn_mul_mont
+
+.align 5
+.ent bn_mul_mont_internal
+bn_mul_mont_internal:
+ .frame $30,14*4,$31
+ .mask 0x40000000|16711680,-4
+ sub $29,14*4
+ sw $30,(14-1)*4($29)
+ sw $23,(14-2)*4($29)
+ sw $22,(14-3)*4($29)
+ sw $21,(14-4)*4($29)
+ sw $20,(14-5)*4($29)
+ sw $19,(14-6)*4($29)
+ sw $18,(14-7)*4($29)
+ sw $17,(14-8)*4($29)
+ sw $16,(14-9)*4($29)
+ move $30,$29
+
+ .set reorder
+ lw $8,0($8)
+ lw $13,0($6) # bp[0]
+ lw $12,0($5) # ap[0]
+ lw $14,0($7) # np[0]
+
+ sub $29,2*4 # place for two extra words
+ sll $9,2
+ li $1,-4096
+ sub $29,$9
+ and $29,$1
+
+ multu $12,$13
+ lw $16,4($5)
+ lw $18,4($7)
+ mflo $10
+ mfhi $11
+ multu $10,$8
+ mflo $23
+
+ multu $16,$13
+ mflo $16
+ mfhi $17
+
+ multu $14,$23
+ mflo $24
+ mfhi $25
+ multu $18,$23
+ addu $24,$10
+ sltu $1,$24,$10
+ addu $25,$1
+ mflo $18
+ mfhi $19
+
+ move $15,$29
+ li $22,2*4
+.align 4
+.L1st:
+ .set noreorder
+ add $12,$5,$22
+ add $14,$7,$22
+ lw $12,($12)
+ lw $14,($14)
+
+ multu $12,$13
+ addu $10,$16,$11
+ addu $24,$18,$25
+ sltu $1,$10,$11
+ sltu $2,$24,$25
+ addu $11,$17,$1
+ addu $25,$19,$2
+ mflo $16
+ mfhi $17
+
+ addu $24,$10
+ sltu $1,$24,$10
+ multu $14,$23
+ addu $25,$1
+ addu $22,4
+ sw $24,($15)
+ sltu $2,$22,$9
+ mflo $18
+ mfhi $19
+
+ bnez $2,.L1st
+ add $15,4
+ .set reorder
+
+ addu $10,$16,$11
+ sltu $1,$10,$11
+ addu $11,$17,$1
+
+ addu $24,$18,$25
+ sltu $2,$24,$25
+ addu $25,$19,$2
+ addu $24,$10
+ sltu $1,$24,$10
+ addu $25,$1
+
+ sw $24,($15)
+
+ addu $25,$11
+ sltu $1,$25,$11
+ sw $25,4($15)
+ sw $1,2*4($15)
+
+ li $21,4
+.align 4
+.Louter:
+ add $13,$6,$21
+ lw $13,($13)
+ lw $12,($5)
+ lw $16,4($5)
+ lw $20,($29)
+
+ multu $12,$13
+ lw $14,($7)
+ lw $18,4($7)
+ mflo $10
+ mfhi $11
+ addu $10,$20
+ multu $10,$8
+ sltu $1,$10,$20
+ addu $11,$1
+ mflo $23
+
+ multu $16,$13
+ mflo $16
+ mfhi $17
+
+ multu $14,$23
+ mflo $24
+ mfhi $25
+
+ multu $18,$23
+ addu $24,$10
+ sltu $1,$24,$10
+ addu $25,$1
+ mflo $18
+ mfhi $19
+
+ move $15,$29
+ li $22,2*4
+ lw $20,4($15)
+.align 4
+.Linner:
+ .set noreorder
+ add $12,$5,$22
+ add $14,$7,$22
+ lw $12,($12)
+ lw $14,($14)
+
+ multu $12,$13
+ addu $10,$16,$11
+ addu $24,$18,$25
+ sltu $1,$10,$11
+ sltu $2,$24,$25
+ addu $11,$17,$1
+ addu $25,$19,$2
+ mflo $16
+ mfhi $17
+
+ addu $10,$20
+ addu $22,4
+ multu $14,$23
+ sltu $1,$10,$20
+ addu $24,$10
+ addu $11,$1
+ sltu $2,$24,$10
+ lw $20,2*4($15)
+ addu $25,$2
+ sltu $1,$22,$9
+ mflo $18
+ mfhi $19
+ sw $24,($15)
+ bnez $1,.Linner
+ add $15,4
+ .set reorder
+
+ addu $10,$16,$11
+ sltu $1,$10,$11
+ addu $11,$17,$1
+ addu $10,$20
+ sltu $2,$10,$20
+ addu $11,$2
+
+ lw $20,2*4($15)
+ addu $24,$18,$25
+ sltu $1,$24,$25
+ addu $25,$19,$1
+ addu $24,$10
+ sltu $2,$24,$10
+ addu $25,$2
+ sw $24,($15)
+
+ addu $24,$25,$11
+ sltu $25,$24,$11
+ addu $24,$20
+ sltu $1,$24,$20
+ addu $25,$1
+ sw $24,4($15)
+ sw $25,2*4($15)
+
+ addu $21,4
+ sltu $2,$21,$9
+ bnez $2,.Louter
+
+ .set noreorder
+ add $20,$29,$9 # &tp[num]
+ move $15,$29
+ move $5,$29
+ li $11,0 # clear borrow bit
+
+.align 4
+.Lsub: lw $10,($15)
+ lw $24,($7)
+ add $15,4
+ add $7,4
+ subu $24,$10,$24 # tp[i]-np[i]
+ sgtu $1,$24,$10
+ subu $10,$24,$11
+ sgtu $11,$10,$24
+ sw $10,($4)
+ or $11,$1
+ sltu $1,$15,$20
+ bnez $1,.Lsub
+ add $4,4
+
+ subu $11,$25,$11 # handle upmost overflow bit
+ move $15,$29
+ sub $4,$9 # restore rp
+ not $25,$11
+
+ and $5,$11,$29
+ and $6,$25,$4
+ or $5,$5,$6 # ap=borrow?tp:rp
+
+.align 4
+.Lcopy: lw $12,($5)
+ add $5,4
+ sw $0,($15)
+ add $15,4
+ sltu $1,$15,$20
+ sw $12,($4)
+ bnez $1,.Lcopy
+ add $4,4
+
+ li $4,1
+ li $2,1
+
+ .set noreorder
+ move $29,$30
+ lw $30,(14-1)*4($29)
+ lw $23,(14-2)*4($29)
+ lw $22,(14-3)*4($29)
+ lw $21,(14-4)*4($29)
+ lw $20,(14-5)*4($29)
+ lw $19,(14-6)*4($29)
+ lw $18,(14-7)*4($29)
+ lw $17,(14-8)*4($29)
+ lw $16,(14-9)*4($29)
+ jr $31
+ add $29,14*4
+.end bn_mul_mont_internal
+.rdata
+.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>"