diff options
Diffstat (limited to 'app/openssl/crypto/bn/asm/mips-mont.S')
-rw-r--r-- | app/openssl/crypto/bn/asm/mips-mont.S | 284 |
1 files changed, 284 insertions, 0 deletions
diff --git a/app/openssl/crypto/bn/asm/mips-mont.S b/app/openssl/crypto/bn/asm/mips-mont.S new file mode 100644 index 00000000..1b875a2a --- /dev/null +++ b/app/openssl/crypto/bn/asm/mips-mont.S @@ -0,0 +1,284 @@ +.text + +.set noat +.set noreorder + +.align 5 +.globl bn_mul_mont +.ent bn_mul_mont +bn_mul_mont: + lw $8,16($29) + lw $9,20($29) + slt $1,$9,4 + bnez $1,1f + li $2,0 + slt $1,$9,17 # on in-order CPU + bnez $1,bn_mul_mont_internal + nop +1: jr $31 + li $4,0 +.end bn_mul_mont + +.align 5 +.ent bn_mul_mont_internal +bn_mul_mont_internal: + .frame $30,14*4,$31 + .mask 0x40000000|16711680,-4 + sub $29,14*4 + sw $30,(14-1)*4($29) + sw $23,(14-2)*4($29) + sw $22,(14-3)*4($29) + sw $21,(14-4)*4($29) + sw $20,(14-5)*4($29) + sw $19,(14-6)*4($29) + sw $18,(14-7)*4($29) + sw $17,(14-8)*4($29) + sw $16,(14-9)*4($29) + move $30,$29 + + .set reorder + lw $8,0($8) + lw $13,0($6) # bp[0] + lw $12,0($5) # ap[0] + lw $14,0($7) # np[0] + + sub $29,2*4 # place for two extra words + sll $9,2 + li $1,-4096 + sub $29,$9 + and $29,$1 + + multu $12,$13 + lw $16,4($5) + lw $18,4($7) + mflo $10 + mfhi $11 + multu $10,$8 + mflo $23 + + multu $16,$13 + mflo $16 + mfhi $17 + + multu $14,$23 + mflo $24 + mfhi $25 + multu $18,$23 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + mflo $18 + mfhi $19 + + move $15,$29 + li $22,2*4 +.align 4 +.L1st: + .set noreorder + add $12,$5,$22 + add $14,$7,$22 + lw $12,($12) + lw $14,($14) + + multu $12,$13 + addu $10,$16,$11 + addu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + addu $11,$17,$1 + addu $25,$19,$2 + mflo $16 + mfhi $17 + + addu $24,$10 + sltu $1,$24,$10 + multu $14,$23 + addu $25,$1 + addu $22,4 + sw $24,($15) + sltu $2,$22,$9 + mflo $18 + mfhi $19 + + bnez $2,.L1st + add $15,4 + .set reorder + + addu $10,$16,$11 + sltu $1,$10,$11 + addu $11,$17,$1 + + addu $24,$18,$25 + sltu $2,$24,$25 + addu $25,$19,$2 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + + sw $24,($15) + + addu $25,$11 + sltu $1,$25,$11 + sw $25,4($15) + sw $1,2*4($15) + + li $21,4 +.align 4 +.Louter: + add $13,$6,$21 + lw $13,($13) + lw $12,($5) + lw $16,4($5) + lw $20,($29) + + multu $12,$13 + lw $14,($7) + lw $18,4($7) + mflo $10 + mfhi $11 + addu $10,$20 + multu $10,$8 + sltu $1,$10,$20 + addu $11,$1 + mflo $23 + + multu $16,$13 + mflo $16 + mfhi $17 + + multu $14,$23 + mflo $24 + mfhi $25 + + multu $18,$23 + addu $24,$10 + sltu $1,$24,$10 + addu $25,$1 + mflo $18 + mfhi $19 + + move $15,$29 + li $22,2*4 + lw $20,4($15) +.align 4 +.Linner: + .set noreorder + add $12,$5,$22 + add $14,$7,$22 + lw $12,($12) + lw $14,($14) + + multu $12,$13 + addu $10,$16,$11 + addu $24,$18,$25 + sltu $1,$10,$11 + sltu $2,$24,$25 + addu $11,$17,$1 + addu $25,$19,$2 + mflo $16 + mfhi $17 + + addu $10,$20 + addu $22,4 + multu $14,$23 + sltu $1,$10,$20 + addu $24,$10 + addu $11,$1 + sltu $2,$24,$10 + lw $20,2*4($15) + addu $25,$2 + sltu $1,$22,$9 + mflo $18 + mfhi $19 + sw $24,($15) + bnez $1,.Linner + add $15,4 + .set reorder + + addu $10,$16,$11 + sltu $1,$10,$11 + addu $11,$17,$1 + addu $10,$20 + sltu $2,$10,$20 + addu $11,$2 + + lw $20,2*4($15) + addu $24,$18,$25 + sltu $1,$24,$25 + addu $25,$19,$1 + addu $24,$10 + sltu $2,$24,$10 + addu $25,$2 + sw $24,($15) + + addu $24,$25,$11 + sltu $25,$24,$11 + addu $24,$20 + sltu $1,$24,$20 + addu $25,$1 + sw $24,4($15) + sw $25,2*4($15) + + addu $21,4 + sltu $2,$21,$9 + bnez $2,.Louter + + .set noreorder + add $20,$29,$9 # &tp[num] + move $15,$29 + move $5,$29 + li $11,0 # clear borrow bit + +.align 4 +.Lsub: lw $10,($15) + lw $24,($7) + add $15,4 + add $7,4 + subu $24,$10,$24 # tp[i]-np[i] + sgtu $1,$24,$10 + subu $10,$24,$11 + sgtu $11,$10,$24 + sw $10,($4) + or $11,$1 + sltu $1,$15,$20 + bnez $1,.Lsub + add $4,4 + + subu $11,$25,$11 # handle upmost overflow bit + move $15,$29 + sub $4,$9 # restore rp + not $25,$11 + + and $5,$11,$29 + and $6,$25,$4 + or $5,$5,$6 # ap=borrow?tp:rp + +.align 4 +.Lcopy: lw $12,($5) + add $5,4 + sw $0,($15) + add $15,4 + sltu $1,$15,$20 + sw $12,($4) + bnez $1,.Lcopy + add $4,4 + + li $4,1 + li $2,1 + + .set noreorder + move $29,$30 + lw $30,(14-1)*4($29) + lw $23,(14-2)*4($29) + lw $22,(14-3)*4($29) + lw $21,(14-4)*4($29) + lw $20,(14-5)*4($29) + lw $19,(14-6)*4($29) + lw $18,(14-7)*4($29) + lw $17,(14-8)*4($29) + lw $16,(14-9)*4($29) + jr $31 + add $29,14*4 +.end bn_mul_mont_internal +.rdata +.asciiz "Montgomery Multiplication for MIPS, CRYPTOGAMS by <appro@openssl.org>" |