diff options
Diffstat (limited to 'main/openssl/crypto/bn/asm/bn-mips.S')
-rw-r--r-- | main/openssl/crypto/bn/asm/bn-mips.S | 2175 |
1 files changed, 2175 insertions, 0 deletions
diff --git a/main/openssl/crypto/bn/asm/bn-mips.S b/main/openssl/crypto/bn/asm/bn-mips.S new file mode 100644 index 00000000..2e7cccb7 --- /dev/null +++ b/main/openssl/crypto/bn/asm/bn-mips.S @@ -0,0 +1,2175 @@ +.set mips2 +.rdata +.asciiz "mips3.s, Version 1.2" +.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" + +.text +.set noat + +.align 5 +.globl bn_mul_add_words +.ent bn_mul_add_words +bn_mul_add_words: + .set noreorder + bgtz $6,bn_mul_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_add_words + +.align 5 +.ent bn_mul_add_words_internal +bn_mul_add_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_add_words_tail + +.L_bn_mul_add_words_loop: + lw $12,0($5) + multu $12,$7 + lw $13,0($4) + lw $14,4($5) + lw $15,4($4) + lw $8,2*4($5) + lw $9,2*4($4) + addu $13,$2 + sltu $2,$13,$2 # All manuals say it "compares 32-bit + # values", but it seems to work fine + # even on 64-bit registers. + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + multu $14,$7 + sltu $1,$13,$1 + sw $13,0($4) + addu $2,$1 + + lw $10,3*4($5) + lw $11,3*4($4) + addu $15,$2 + sltu $2,$15,$2 + mflo $1 + mfhi $14 + addu $15,$1 + addu $2,$14 + multu $8,$7 + sltu $1,$15,$1 + sw $15,4($4) + addu $2,$1 + + subu $6,4 + addu $4,4*4 + addu $5,4*4 + addu $9,$2 + sltu $2,$9,$2 + mflo $1 + mfhi $8 + addu $9,$1 + addu $2,$8 + multu $10,$7 + sltu $1,$9,$1 + sw $9,-2*4($4) + addu $2,$1 + + + and $8,$6,$3 + addu $11,$2 + sltu $2,$11,$2 + mflo $1 + mfhi $10 + addu $11,$1 + addu $2,$10 + sltu $1,$11,$1 + sw $11,-4($4) + .set noreorder + bgtz $8,.L_bn_mul_add_words_loop + addu $2,$1 + + beqz $6,.L_bn_mul_add_words_return + nop + +.L_bn_mul_add_words_tail: + .set reorder + lw $12,0($5) + multu $12,$7 + lw $13,0($4) + subu $6,1 + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,0($4) + addu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + lw $12,4($5) + multu $12,$7 + lw $13,4($4) + subu $6,1 + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,4($4) + addu $2,$1 + beqz $6,.L_bn_mul_add_words_return + + lw $12,2*4($5) + multu $12,$7 + lw $13,2*4($4) + addu $13,$2 + sltu $2,$13,$2 + mflo $1 + mfhi $12 + addu $13,$1 + addu $2,$12 + sltu $1,$13,$1 + sw $13,2*4($4) + addu $2,$1 + +.L_bn_mul_add_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_add_words_internal + +.align 5 +.globl bn_mul_words +.ent bn_mul_words +bn_mul_words: + .set noreorder + bgtz $6,bn_mul_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_mul_words + +.align 5 +.ent bn_mul_words_internal +bn_mul_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_mul_words_tail + +.L_bn_mul_words_loop: + lw $12,0($5) + multu $12,$7 + lw $14,4($5) + lw $8,2*4($5) + lw $10,3*4($5) + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + multu $14,$7 + sw $2,0($4) + addu $2,$13,$12 + + subu $6,4 + addu $4,4*4 + addu $5,4*4 + mflo $1 + mfhi $14 + addu $2,$1 + sltu $15,$2,$1 + multu $8,$7 + sw $2,-3*4($4) + addu $2,$15,$14 + + mflo $1 + mfhi $8 + addu $2,$1 + sltu $9,$2,$1 + multu $10,$7 + sw $2,-2*4($4) + addu $2,$9,$8 + + and $8,$6,$3 + mflo $1 + mfhi $10 + addu $2,$1 + sltu $11,$2,$1 + sw $2,-4($4) + .set noreorder + bgtz $8,.L_bn_mul_words_loop + addu $2,$11,$10 + + beqz $6,.L_bn_mul_words_return + nop + +.L_bn_mul_words_tail: + .set reorder + lw $12,0($5) + multu $12,$7 + subu $6,1 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,0($4) + addu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + lw $12,4($5) + multu $12,$7 + subu $6,1 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,4($4) + addu $2,$13,$12 + beqz $6,.L_bn_mul_words_return + + lw $12,2*4($5) + multu $12,$7 + mflo $1 + mfhi $12 + addu $2,$1 + sltu $13,$2,$1 + sw $2,2*4($4) + addu $2,$13,$12 + +.L_bn_mul_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_mul_words_internal + +.align 5 +.globl bn_sqr_words +.ent bn_sqr_words +bn_sqr_words: + .set noreorder + bgtz $6,bn_sqr_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_sqr_words + +.align 5 +.ent bn_sqr_words_internal +bn_sqr_words_internal: + .set reorder + li $3,-4 + and $8,$6,$3 + beqz $8,.L_bn_sqr_words_tail + +.L_bn_sqr_words_loop: + lw $12,0($5) + multu $12,$12 + lw $14,4($5) + lw $8,2*4($5) + lw $10,3*4($5) + mflo $13 + mfhi $12 + sw $13,0($4) + sw $12,4($4) + + multu $14,$14 + subu $6,4 + addu $4,8*4 + addu $5,4*4 + mflo $15 + mfhi $14 + sw $15,-6*4($4) + sw $14,-5*4($4) + + multu $8,$8 + mflo $9 + mfhi $8 + sw $9,-4*4($4) + sw $8,-3*4($4) + + + multu $10,$10 + and $8,$6,$3 + mflo $11 + mfhi $10 + sw $11,-2*4($4) + + .set noreorder + bgtz $8,.L_bn_sqr_words_loop + sw $10,-4($4) + + beqz $6,.L_bn_sqr_words_return + nop + +.L_bn_sqr_words_tail: + .set reorder + lw $12,0($5) + multu $12,$12 + subu $6,1 + mflo $13 + mfhi $12 + sw $13,0($4) + sw $12,4($4) + beqz $6,.L_bn_sqr_words_return + + lw $12,4($5) + multu $12,$12 + subu $6,1 + mflo $13 + mfhi $12 + sw $13,2*4($4) + sw $12,3*4($4) + beqz $6,.L_bn_sqr_words_return + + lw $12,2*4($5) + multu $12,$12 + mflo $13 + mfhi $12 + sw $13,4*4($4) + sw $12,5*4($4) + +.L_bn_sqr_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_sqr_words_internal + +.align 5 +.globl bn_add_words +.ent bn_add_words +bn_add_words: + .set noreorder + bgtz $7,bn_add_words_internal + move $2,$0 + jr $31 + move $4,$2 +.end bn_add_words + +.align 5 +.ent bn_add_words_internal +bn_add_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_add_words_tail + +.L_bn_add_words_loop: + lw $12,0($5) + lw $8,0($6) + subu $7,4 + lw $13,4($5) + and $1,$7,$3 + lw $14,2*4($5) + addu $6,4*4 + lw $15,3*4($5) + addu $4,4*4 + lw $9,-3*4($6) + addu $5,4*4 + lw $10,-2*4($6) + lw $11,-4($6) + addu $8,$12 + sltu $24,$8,$12 + addu $12,$8,$2 + sltu $2,$12,$8 + sw $12,-4*4($4) + addu $2,$24 + + addu $9,$13 + sltu $25,$9,$13 + addu $13,$9,$2 + sltu $2,$13,$9 + sw $13,-3*4($4) + addu $2,$25 + + addu $10,$14 + sltu $24,$10,$14 + addu $14,$10,$2 + sltu $2,$14,$10 + sw $14,-2*4($4) + addu $2,$24 + + addu $11,$15 + sltu $25,$11,$15 + addu $15,$11,$2 + sltu $2,$15,$11 + sw $15,-4($4) + + .set noreorder + bgtz $1,.L_bn_add_words_loop + addu $2,$25 + + beqz $7,.L_bn_add_words_return + nop + +.L_bn_add_words_tail: + .set reorder + lw $12,0($5) + lw $8,0($6) + addu $8,$12 + subu $7,1 + sltu $24,$8,$12 + addu $12,$8,$2 + sltu $2,$12,$8 + sw $12,0($4) + addu $2,$24 + beqz $7,.L_bn_add_words_return + + lw $13,4($5) + lw $9,4($6) + addu $9,$13 + subu $7,1 + sltu $25,$9,$13 + addu $13,$9,$2 + sltu $2,$13,$9 + sw $13,4($4) + addu $2,$25 + beqz $7,.L_bn_add_words_return + + lw $14,2*4($5) + lw $10,2*4($6) + addu $10,$14 + sltu $24,$10,$14 + addu $14,$10,$2 + sltu $2,$14,$10 + sw $14,2*4($4) + addu $2,$24 + +.L_bn_add_words_return: + .set noreorder + jr $31 + move $4,$2 + +.end bn_add_words_internal + +.align 5 +.globl bn_sub_words +.ent bn_sub_words +bn_sub_words: + .set noreorder + bgtz $7,bn_sub_words_internal + move $2,$0 + jr $31 + move $4,$0 +.end bn_sub_words + +.align 5 +.ent bn_sub_words_internal +bn_sub_words_internal: + .set reorder + li $3,-4 + and $1,$7,$3 + beqz $1,.L_bn_sub_words_tail + +.L_bn_sub_words_loop: + lw $12,0($5) + lw $8,0($6) + subu $7,4 + lw $13,4($5) + and $1,$7,$3 + lw $14,2*4($5) + addu $6,4*4 + lw $15,3*4($5) + addu $4,4*4 + lw $9,-3*4($6) + addu $5,4*4 + lw $10,-2*4($6) + lw $11,-4($6) + sltu $24,$12,$8 + subu $8,$12,$8 + subu $12,$8,$2 + sgtu $2,$12,$8 + sw $12,-4*4($4) + addu $2,$24 + + sltu $25,$13,$9 + subu $9,$13,$9 + subu $13,$9,$2 + sgtu $2,$13,$9 + sw $13,-3*4($4) + addu $2,$25 + + + sltu $24,$14,$10 + subu $10,$14,$10 + subu $14,$10,$2 + sgtu $2,$14,$10 + sw $14,-2*4($4) + addu $2,$24 + + sltu $25,$15,$11 + subu $11,$15,$11 + subu $15,$11,$2 + sgtu $2,$15,$11 + sw $15,-4($4) + + .set noreorder + bgtz $1,.L_bn_sub_words_loop + addu $2,$25 + + beqz $7,.L_bn_sub_words_return + nop + +.L_bn_sub_words_tail: + .set reorder + lw $12,0($5) + lw $8,0($6) + subu $7,1 + sltu $24,$12,$8 + subu $8,$12,$8 + subu $12,$8,$2 + sgtu $2,$12,$8 + sw $12,0($4) + addu $2,$24 + beqz $7,.L_bn_sub_words_return + + lw $13,4($5) + subu $7,1 + lw $9,4($6) + sltu $25,$13,$9 + subu $9,$13,$9 + subu $13,$9,$2 + sgtu $2,$13,$9 + sw $13,4($4) + addu $2,$25 + beqz $7,.L_bn_sub_words_return + + lw $14,2*4($5) + lw $10,2*4($6) + sltu $24,$14,$10 + subu $10,$14,$10 + subu $14,$10,$2 + sgtu $2,$14,$10 + sw $14,2*4($4) + addu $2,$24 + +.L_bn_sub_words_return: + .set noreorder + jr $31 + move $4,$2 +.end bn_sub_words_internal + +.align 5 +.globl bn_div_3_words +.ent bn_div_3_words +bn_div_3_words: + .set noreorder + move $7,$4 # we know that bn_div_words does not + # touch $7, $10, $11 and preserves $6 + # so that we can save two arguments + # and return address in registers + # instead of stack:-) + + lw $4,($7) + move $10,$5 + bne $4,$6,bn_div_3_words_internal + lw $5,-4($7) + li $2,-1 + jr $31 + move $4,$2 +.end bn_div_3_words + +.align 5 +.ent bn_div_3_words_internal +bn_div_3_words_internal: + .set reorder + move $11,$31 + bal bn_div_words_internal + move $31,$11 + multu $10,$2 + lw $14,-2*4($7) + move $8,$0 + mfhi $13 + mflo $12 + sltu $24,$13,$5 +.L_bn_div_3_words_inner_loop: + bnez $24,.L_bn_div_3_words_inner_loop_done + sgeu $1,$14,$12 + seq $25,$13,$5 + and $1,$25 + sltu $15,$12,$10 + addu $5,$6 + subu $13,$15 + subu $12,$10 + sltu $24,$13,$5 + sltu $8,$5,$6 + or $24,$8 + .set noreorder + beqz $1,.L_bn_div_3_words_inner_loop + subu $2,1 + addu $2,1 + .set reorder +.L_bn_div_3_words_inner_loop_done: + .set noreorder + jr $31 + move $4,$2 +.end bn_div_3_words_internal + +.align 5 +.globl bn_div_words +.ent bn_div_words +bn_div_words: + .set noreorder + bnez $6,bn_div_words_internal + li $2,-1 # I would rather signal div-by-zero + # which can be done with 'break 7' + jr $31 + move $4,$2 +.end bn_div_words + +.align 5 +.ent bn_div_words_internal +bn_div_words_internal: + move $3,$0 + bltz $6,.L_bn_div_words_body + move $25,$3 + sll $6,1 + bgtz $6,.-4 + addu $25,1 + + .set reorder + negu $13,$25 + li $14,-1 + sll $14,$13 + and $14,$4 + srl $1,$5,$13 + .set noreorder + beqz $14,.+12 + nop + break 6 # signal overflow + .set reorder + sll $4,$25 + sll $5,$25 + or $4,$1 +.L_bn_div_words_body: + srl $3,$6,4*4 # bits + sgeu $1,$4,$6 + .set noreorder + beqz $1,.+12 + nop + subu $4,$6 + .set reorder + + li $8,-1 + srl $9,$4,4*4 # bits + srl $8,4*4 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div1 + divu $0,$4,$3 + mflo $8 +.L_bn_div_words_skip_div1: + multu $6,$8 + sll $15,$4,4*4 # bits + srl $1,$5,4*4 # bits + or $15,$1 + mflo $12 + mfhi $13 +.L_bn_div_words_inner_loop1: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $2,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop1_done + subu $13,$2 + subu $12,$6 + b .L_bn_div_words_inner_loop1 + subu $8,1 + .set reorder +.L_bn_div_words_inner_loop1_done: + + sll $5,4*4 # bits + subu $4,$15,$12 + sll $2,$8,4*4 # bits + + li $8,-1 + srl $9,$4,4*4 # bits + srl $8,4*4 # q=0xffffffff + beq $3,$9,.L_bn_div_words_skip_div2 + divu $0,$4,$3 + mflo $8 +.L_bn_div_words_skip_div2: + multu $6,$8 + sll $15,$4,4*4 # bits + srl $1,$5,4*4 # bits + or $15,$1 + mflo $12 + mfhi $13 +.L_bn_div_words_inner_loop2: + sltu $14,$15,$12 + seq $24,$9,$13 + sltu $1,$9,$13 + and $14,$24 + sltu $3,$12,$6 + or $1,$14 + .set noreorder + beqz $1,.L_bn_div_words_inner_loop2_done + subu $13,$3 + subu $12,$6 + b .L_bn_div_words_inner_loop2 + subu $8,1 + .set reorder +.L_bn_div_words_inner_loop2_done: + + subu $4,$15,$12 + or $2,$8 + srl $3,$4,$25 # $3 contains remainder if anybody wants it + srl $6,$25 # restore $6 + + .set noreorder + move $5,$3 + jr $31 + move $4,$2 +.end bn_div_words_internal + +.align 5 +.globl bn_mul_comba8 +.ent bn_mul_comba8 +bn_mul_comba8: + .set noreorder + .frame $29,6*4,$31 + .mask 0x003f0000,-4 + subu $29,6*4 + sw $21,5*4($29) + sw $20,4*4($29) + sw $19,3*4($29) + sw $18,2*4($29) + sw $17,1*4($29) + sw $16,0*4($29) + + .set reorder + lw $12,0($5) # If compiled with -mips3 option on + # R5000 box assembler barks on this + # 1ine with "should not have mult/div + # as last instruction in bb (R10K + # bug)" warning. If anybody out there + # has a clue about how to circumvent + # this do send me a note. + # <appro@fy.chalmers.se> + + lw $8,0($6) + lw $13,4($5) + lw $14,2*4($5) + multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $15,3*4($5) + lw $9,4($6) + lw $10,2*4($6) + lw $11,3*4($6) + mflo $2 + mfhi $3 + + lw $16,4*4($5) + lw $18,5*4($5) + multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); + lw $20,6*4($5) + lw $5,7*4($5) + lw $17,4*4($6) + lw $19,5*4($6) + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); + addu $7,$25,$1 + lw $21,6*4($6) + lw $6,7*4($6) + sw $2,0($4) # r[0]=c1; + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + sw $3,4($4) # r[1]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) # r[2]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) # r[3]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) # r[4]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) # r[5]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,6*4($4) # r[6]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,7*4($4) # r[7]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,8*4($4) # r[8]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,9*4($4) # r[9]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,10*4($4) # r[10]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,11*4($4) # r[11]=c3; + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,12*4($4) # r[12]=c1; + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,13*4($4) # r[13]=c2; + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sw $7,14*4($4) # r[14]=c3; + sw $2,15*4($4) # r[15]=c1; + + .set noreorder + lw $21,5*4($29) + lw $20,4*4($29) + lw $19,3*4($29) + lw $18,2*4($29) + lw $17,1*4($29) + lw $16,0*4($29) + jr $31 + addu $29,6*4 +.end bn_mul_comba8 + +.align 5 +.globl bn_mul_comba4 +.ent bn_mul_comba4 +bn_mul_comba4: + .set reorder + lw $12,0($5) + lw $8,0($6) + lw $13,4($5) + lw $14,2*4($5) + multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $15,3*4($5) + lw $9,4($6) + lw $10,2*4($6) + lw $11,3*4($6) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); + addu $7,$25,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + sw $3,4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); + addu $25,$1 + addu $2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $7,$3,$25 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $2,$7,$25 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); + addu $25,$1 + addu $2,$25 + sltu $3,$2,$25 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sw $2,6*4($4) + sw $3,7*4($4) + + .set noreorder + jr $31 + nop +.end bn_mul_comba4 + +.align 5 +.globl bn_sqr_comba8 +.ent bn_sqr_comba8 +bn_sqr_comba8: + .set reorder + lw $12,0($5) + lw $13,4($5) + lw $14,2*4($5) + lw $15,3*4($5) + + multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $8,4*4($5) + lw $9,5*4($5) + lw $10,6*4($5) + lw $11,7*4($5) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $7,$25,$1 + sw $3,4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); + addu $3,$1 + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,6*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,7*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,8*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,9*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $2,$1 + multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,10*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $3,$1 + multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,11*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,12*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,13*4($4) + + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sw $7,14*4($4) + sw $2,15*4($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba8 + +.align 5 +.globl bn_sqr_comba4 +.ent bn_sqr_comba4 +bn_sqr_comba4: + .set reorder + lw $12,0($5) + lw $13,4($5) + multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); + lw $14,2*4($5) + lw $15,3*4($5) + mflo $2 + mfhi $3 + sw $2,0($4) + + multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $7,$25,$1 + sw $3,4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + mflo $24 + mfhi $25 + addu $7,$24 + sltu $1,$7,$24 + multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,2*4($4) + + mflo $24 + mfhi $25 + slt $7,$25,$0 + sll $25,1 + multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + mflo $24 + mfhi $25 + slt $1,$25,$0 + addu $7,$1 + multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); + sll $25,1 + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sltu $1,$3,$25 + addu $7,$1 + sw $2,3*4($4) + + mflo $24 + mfhi $25 + slt $2,$25,$0 + sll $25,1 + multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $3,$24 + sltu $1,$3,$24 + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + mflo $24 + mfhi $25 + addu $3,$24 + sltu $1,$3,$24 + multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); + addu $25,$1 + addu $7,$25 + sltu $1,$7,$25 + addu $2,$1 + sw $3,4*4($4) + + mflo $24 + mfhi $25 + slt $3,$25,$0 + sll $25,1 + multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); + slt $6,$24,$0 + addu $25,$6 + sll $24,1 + addu $7,$24 + sltu $1,$7,$24 + addu $25,$1 + addu $2,$25 + sltu $1,$2,$25 + addu $3,$1 + sw $7,5*4($4) + + mflo $24 + mfhi $25 + addu $2,$24 + sltu $1,$2,$24 + addu $25,$1 + addu $3,$25 + sw $2,6*4($4) + sw $3,7*4($4) + + .set noreorder + jr $31 + nop +.end bn_sqr_comba4 |