diff options
Diffstat (limited to 'main/openssl/crypto/bn/asm/bn-mips.S')
m--------- | main/openssl | 0 | ||||
-rw-r--r-- | main/openssl/crypto/bn/asm/bn-mips.S | 2159 |
2 files changed, 0 insertions, 2159 deletions
diff --git a/main/openssl b/main/openssl new file mode 160000 +Subproject 4d377a9ce111930d8a8f06dc0e94a892a7f6c51 diff --git a/main/openssl/crypto/bn/asm/bn-mips.S b/main/openssl/crypto/bn/asm/bn-mips.S deleted file mode 100644 index 62136802..00000000 --- a/main/openssl/crypto/bn/asm/bn-mips.S +++ /dev/null @@ -1,2159 +0,0 @@ -.set mips2 -.rdata -.asciiz "mips3.s, Version 1.2" -.asciiz "MIPS II/III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" - -.text -.set noat - -.align 5 -.globl bn_mul_add_words -.ent bn_mul_add_words -bn_mul_add_words: - .set noreorder - bgtz $6,bn_mul_add_words_internal - move $2,$0 - jr $31 - move $4,$2 -.end bn_mul_add_words - -.align 5 -.ent bn_mul_add_words_internal -bn_mul_add_words_internal: - .set reorder - li $3,-4 - and $8,$6,$3 - beqz $8,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - lw $12,0($5) - multu $12,$7 - lw $13,0($4) - lw $14,4($5) - lw $15,4($4) - lw $8,2*4($5) - lw $9,2*4($4) - addu $13,$2 - sltu $2,$13,$2 # All manuals say it "compares 32-bit - # values", but it seems to work fine - # even on 64-bit registers. - mflo $1 - mfhi $12 - addu $13,$1 - addu $2,$12 - multu $14,$7 - sltu $1,$13,$1 - sw $13,0($4) - addu $2,$1 - - lw $10,3*4($5) - lw $11,3*4($4) - addu $15,$2 - sltu $2,$15,$2 - mflo $1 - mfhi $14 - addu $15,$1 - addu $2,$14 - multu $8,$7 - sltu $1,$15,$1 - sw $15,4($4) - addu $2,$1 - - subu $6,4 - addu $4,4*4 - addu $5,4*4 - addu $9,$2 - sltu $2,$9,$2 - mflo $1 - mfhi $8 - addu $9,$1 - addu $2,$8 - multu $10,$7 - sltu $1,$9,$1 - sw $9,-2*4($4) - addu $2,$1 - - - and $8,$6,$3 - addu $11,$2 - sltu $2,$11,$2 - mflo $1 - mfhi $10 - addu $11,$1 - addu $2,$10 - sltu $1,$11,$1 - sw $11,-4($4) - .set noreorder - bgtz $8,.L_bn_mul_add_words_loop - addu $2,$1 - - beqz $6,.L_bn_mul_add_words_return - nop - -.L_bn_mul_add_words_tail: - .set reorder - lw $12,0($5) - multu $12,$7 - lw $13,0($4) - subu $6,1 - addu $13,$2 - sltu $2,$13,$2 - mflo $1 - mfhi $12 - addu $13,$1 - addu $2,$12 - sltu $1,$13,$1 - sw $13,0($4) - addu $2,$1 - beqz $6,.L_bn_mul_add_words_return - - lw $12,4($5) - multu $12,$7 - lw $13,4($4) - subu $6,1 - addu $13,$2 - sltu $2,$13,$2 - mflo $1 - mfhi $12 - addu $13,$1 - addu $2,$12 - sltu $1,$13,$1 - sw $13,4($4) - addu $2,$1 - beqz $6,.L_bn_mul_add_words_return - - lw $12,2*4($5) - multu $12,$7 - lw $13,2*4($4) - addu $13,$2 - sltu $2,$13,$2 - mflo $1 - mfhi $12 - addu $13,$1 - addu $2,$12 - sltu $1,$13,$1 - sw $13,2*4($4) - addu $2,$1 - -.L_bn_mul_add_words_return: - .set noreorder - jr $31 - move $4,$2 -.end bn_mul_add_words_internal - -.align 5 -.globl bn_mul_words -.ent bn_mul_words -bn_mul_words: - .set noreorder - bgtz $6,bn_mul_words_internal - move $2,$0 - jr $31 - move $4,$2 -.end bn_mul_words - -.align 5 -.ent bn_mul_words_internal -bn_mul_words_internal: - .set reorder - li $3,-4 - and $8,$6,$3 - beqz $8,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - lw $12,0($5) - multu $12,$7 - lw $14,4($5) - lw $8,2*4($5) - lw $10,3*4($5) - mflo $1 - mfhi $12 - addu $2,$1 - sltu $13,$2,$1 - multu $14,$7 - sw $2,0($4) - addu $2,$13,$12 - - subu $6,4 - addu $4,4*4 - addu $5,4*4 - mflo $1 - mfhi $14 - addu $2,$1 - sltu $15,$2,$1 - multu $8,$7 - sw $2,-3*4($4) - addu $2,$15,$14 - - mflo $1 - mfhi $8 - addu $2,$1 - sltu $9,$2,$1 - multu $10,$7 - sw $2,-2*4($4) - addu $2,$9,$8 - - and $8,$6,$3 - mflo $1 - mfhi $10 - addu $2,$1 - sltu $11,$2,$1 - sw $2,-4($4) - .set noreorder - bgtz $8,.L_bn_mul_words_loop - addu $2,$11,$10 - - beqz $6,.L_bn_mul_words_return - nop - -.L_bn_mul_words_tail: - .set reorder - lw $12,0($5) - multu $12,$7 - subu $6,1 - mflo $1 - mfhi $12 - addu $2,$1 - sltu $13,$2,$1 - sw $2,0($4) - addu $2,$13,$12 - beqz $6,.L_bn_mul_words_return - - lw $12,4($5) - multu $12,$7 - subu $6,1 - mflo $1 - mfhi $12 - addu $2,$1 - sltu $13,$2,$1 - sw $2,4($4) - addu $2,$13,$12 - beqz $6,.L_bn_mul_words_return - - lw $12,2*4($5) - multu $12,$7 - mflo $1 - mfhi $12 - addu $2,$1 - sltu $13,$2,$1 - sw $2,2*4($4) - addu $2,$13,$12 - -.L_bn_mul_words_return: - .set noreorder - jr $31 - move $4,$2 -.end bn_mul_words_internal - -.align 5 -.globl bn_sqr_words -.ent bn_sqr_words -bn_sqr_words: - .set noreorder - bgtz $6,bn_sqr_words_internal - move $2,$0 - jr $31 - move $4,$2 -.end bn_sqr_words - -.align 5 -.ent bn_sqr_words_internal -bn_sqr_words_internal: - .set reorder - li $3,-4 - and $8,$6,$3 - beqz $8,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - lw $12,0($5) - multu $12,$12 - lw $14,4($5) - lw $8,2*4($5) - lw $10,3*4($5) - mflo $13 - mfhi $12 - sw $13,0($4) - sw $12,4($4) - - multu $14,$14 - subu $6,4 - addu $4,8*4 - addu $5,4*4 - mflo $15 - mfhi $14 - sw $15,-6*4($4) - sw $14,-5*4($4) - - multu $8,$8 - mflo $9 - mfhi $8 - sw $9,-4*4($4) - sw $8,-3*4($4) - - - multu $10,$10 - and $8,$6,$3 - mflo $11 - mfhi $10 - sw $11,-2*4($4) - - .set noreorder - bgtz $8,.L_bn_sqr_words_loop - sw $10,-4($4) - - beqz $6,.L_bn_sqr_words_return - nop - -.L_bn_sqr_words_tail: - .set reorder - lw $12,0($5) - multu $12,$12 - subu $6,1 - mflo $13 - mfhi $12 - sw $13,0($4) - sw $12,4($4) - beqz $6,.L_bn_sqr_words_return - - lw $12,4($5) - multu $12,$12 - subu $6,1 - mflo $13 - mfhi $12 - sw $13,2*4($4) - sw $12,3*4($4) - beqz $6,.L_bn_sqr_words_return - - lw $12,2*4($5) - multu $12,$12 - mflo $13 - mfhi $12 - sw $13,4*4($4) - sw $12,5*4($4) - -.L_bn_sqr_words_return: - .set noreorder - jr $31 - move $4,$2 - -.end bn_sqr_words_internal - -.align 5 -.globl bn_add_words -.ent bn_add_words -bn_add_words: - .set noreorder - bgtz $7,bn_add_words_internal - move $2,$0 - jr $31 - move $4,$2 -.end bn_add_words - -.align 5 -.ent bn_add_words_internal -bn_add_words_internal: - .set reorder - li $3,-4 - and $1,$7,$3 - beqz $1,.L_bn_add_words_tail - -.L_bn_add_words_loop: - lw $12,0($5) - lw $8,0($6) - subu $7,4 - lw $13,4($5) - and $1,$7,$3 - lw $14,2*4($5) - addu $6,4*4 - lw $15,3*4($5) - addu $4,4*4 - lw $9,-3*4($6) - addu $5,4*4 - lw $10,-2*4($6) - lw $11,-4($6) - addu $8,$12 - sltu $24,$8,$12 - addu $12,$8,$2 - sltu $2,$12,$8 - sw $12,-4*4($4) - addu $2,$24 - - addu $9,$13 - sltu $25,$9,$13 - addu $13,$9,$2 - sltu $2,$13,$9 - sw $13,-3*4($4) - addu $2,$25 - - addu $10,$14 - sltu $24,$10,$14 - addu $14,$10,$2 - sltu $2,$14,$10 - sw $14,-2*4($4) - addu $2,$24 - - addu $11,$15 - sltu $25,$11,$15 - addu $15,$11,$2 - sltu $2,$15,$11 - sw $15,-4($4) - - .set noreorder - bgtz $1,.L_bn_add_words_loop - addu $2,$25 - - beqz $7,.L_bn_add_words_return - nop - -.L_bn_add_words_tail: - .set reorder - lw $12,0($5) - lw $8,0($6) - addu $8,$12 - subu $7,1 - sltu $24,$8,$12 - addu $12,$8,$2 - sltu $2,$12,$8 - sw $12,0($4) - addu $2,$24 - beqz $7,.L_bn_add_words_return - - lw $13,4($5) - lw $9,4($6) - addu $9,$13 - subu $7,1 - sltu $25,$9,$13 - addu $13,$9,$2 - sltu $2,$13,$9 - sw $13,4($4) - addu $2,$25 - beqz $7,.L_bn_add_words_return - - lw $14,2*4($5) - lw $10,2*4($6) - addu $10,$14 - sltu $24,$10,$14 - addu $14,$10,$2 - sltu $2,$14,$10 - sw $14,2*4($4) - addu $2,$24 - -.L_bn_add_words_return: - .set noreorder - jr $31 - move $4,$2 - -.end bn_add_words_internal - -.align 5 -.globl bn_sub_words -.ent bn_sub_words -bn_sub_words: - .set noreorder - bgtz $7,bn_sub_words_internal - move $2,$0 - jr $31 - move $4,$0 -.end bn_sub_words - -.align 5 -.ent bn_sub_words_internal -bn_sub_words_internal: - .set reorder - li $3,-4 - and $1,$7,$3 - beqz $1,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - lw $12,0($5) - lw $8,0($6) - subu $7,4 - lw $13,4($5) - and $1,$7,$3 - lw $14,2*4($5) - addu $6,4*4 - lw $15,3*4($5) - addu $4,4*4 - lw $9,-3*4($6) - addu $5,4*4 - lw $10,-2*4($6) - lw $11,-4($6) - sltu $24,$12,$8 - subu $8,$12,$8 - subu $12,$8,$2 - sgtu $2,$12,$8 - sw $12,-4*4($4) - addu $2,$24 - - sltu $25,$13,$9 - subu $9,$13,$9 - subu $13,$9,$2 - sgtu $2,$13,$9 - sw $13,-3*4($4) - addu $2,$25 - - - sltu $24,$14,$10 - subu $10,$14,$10 - subu $14,$10,$2 - sgtu $2,$14,$10 - sw $14,-2*4($4) - addu $2,$24 - - sltu $25,$15,$11 - subu $11,$15,$11 - subu $15,$11,$2 - sgtu $2,$15,$11 - sw $15,-4($4) - - .set noreorder - bgtz $1,.L_bn_sub_words_loop - addu $2,$25 - - beqz $7,.L_bn_sub_words_return - nop - -.L_bn_sub_words_tail: - .set reorder - lw $12,0($5) - lw $8,0($6) - subu $7,1 - sltu $24,$12,$8 - subu $8,$12,$8 - subu $12,$8,$2 - sgtu $2,$12,$8 - sw $12,0($4) - addu $2,$24 - beqz $7,.L_bn_sub_words_return - - lw $13,4($5) - subu $7,1 - lw $9,4($6) - sltu $25,$13,$9 - subu $9,$13,$9 - subu $13,$9,$2 - sgtu $2,$13,$9 - sw $13,4($4) - addu $2,$25 - beqz $7,.L_bn_sub_words_return - - lw $14,2*4($5) - lw $10,2*4($6) - sltu $24,$14,$10 - subu $10,$14,$10 - subu $14,$10,$2 - sgtu $2,$14,$10 - sw $14,2*4($4) - addu $2,$24 - -.L_bn_sub_words_return: - .set noreorder - jr $31 - move $4,$2 -.end bn_sub_words_internal - -.align 5 -.globl bn_div_3_words -.ent bn_div_3_words -bn_div_3_words: - .set noreorder - move $7,$4 # we know that bn_div_words does not - # touch $7, $10, $11 and preserves $6 - # so that we can save two arguments - # and return address in registers - # instead of stack:-) - - lw $4,($7) - move $10,$5 - bne $4,$6,bn_div_3_words_internal - lw $5,-4($7) - li $2,-1 - jr $31 - move $4,$2 -.end bn_div_3_words - -.align 5 -.ent bn_div_3_words_internal -bn_div_3_words_internal: - .set reorder - move $11,$31 - bal bn_div_words_internal - move $31,$11 - multu $10,$2 - lw $14,-2*4($7) - move $8,$0 - mfhi $13 - mflo $12 - sltu $24,$13,$5 -.L_bn_div_3_words_inner_loop: - bnez $24,.L_bn_div_3_words_inner_loop_done - sgeu $1,$14,$12 - seq $25,$13,$5 - and $1,$25 - sltu $15,$12,$10 - addu $5,$6 - subu $13,$15 - subu $12,$10 - sltu $24,$13,$5 - sltu $8,$5,$6 - or $24,$8 - .set noreorder - beqz $1,.L_bn_div_3_words_inner_loop - subu $2,1 - addu $2,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - .set noreorder - jr $31 - move $4,$2 -.end bn_div_3_words_internal - -.align 5 -.globl bn_div_words -.ent bn_div_words -bn_div_words: - .set noreorder - bnez $6,bn_div_words_internal - li $2,-1 # I would rather signal div-by-zero - # which can be done with 'break 7' - jr $31 - move $4,$2 -.end bn_div_words - -.align 5 -.ent bn_div_words_internal -bn_div_words_internal: - move $3,$0 - bltz $6,.L_bn_div_words_body - move $25,$3 - sll $6,1 - bgtz $6,.-4 - addu $25,1 - - .set reorder - negu $13,$25 - li $14,-1 - sll $14,$13 - and $14,$4 - srl $1,$5,$13 - .set noreorder - beqz $14,.+12 - nop - break 6 # signal overflow - .set reorder - sll $4,$25 - sll $5,$25 - or $4,$1 -.L_bn_div_words_body: - srl $3,$6,4*4 # bits - sgeu $1,$4,$6 - .set noreorder - beqz $1,.+12 - nop - subu $4,$6 - .set reorder - - li $8,-1 - srl $9,$4,4*4 # bits - srl $8,4*4 # q=0xffffffff - beq $3,$9,.L_bn_div_words_skip_div1 - divu $0,$4,$3 - mflo $8 -.L_bn_div_words_skip_div1: - multu $6,$8 - sll $15,$4,4*4 # bits - srl $1,$5,4*4 # bits - or $15,$1 - mflo $12 - mfhi $13 -.L_bn_div_words_inner_loop1: - sltu $14,$15,$12 - seq $24,$9,$13 - sltu $1,$9,$13 - and $14,$24 - sltu $2,$12,$6 - or $1,$14 - .set noreorder - beqz $1,.L_bn_div_words_inner_loop1_done - subu $13,$2 - subu $12,$6 - b .L_bn_div_words_inner_loop1 - subu $8,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - sll $5,4*4 # bits - subu $4,$15,$12 - sll $2,$8,4*4 # bits - - li $8,-1 - srl $9,$4,4*4 # bits - srl $8,4*4 # q=0xffffffff - beq $3,$9,.L_bn_div_words_skip_div2 - divu $0,$4,$3 - mflo $8 -.L_bn_div_words_skip_div2: - multu $6,$8 - sll $15,$4,4*4 # bits - srl $1,$5,4*4 # bits - or $15,$1 - mflo $12 - mfhi $13 -.L_bn_div_words_inner_loop2: - sltu $14,$15,$12 - seq $24,$9,$13 - sltu $1,$9,$13 - and $14,$24 - sltu $3,$12,$6 - or $1,$14 - .set noreorder - beqz $1,.L_bn_div_words_inner_loop2_done - subu $13,$3 - subu $12,$6 - b .L_bn_div_words_inner_loop2 - subu $8,1 - .set reorder -.L_bn_div_words_inner_loop2_done: - - subu $4,$15,$12 - or $2,$8 - srl $3,$4,$25 # $3 contains remainder if anybody wants it - srl $6,$25 # restore $6 - - .set noreorder - move $5,$3 - jr $31 - move $4,$2 -.end bn_div_words_internal - -.align 5 -.globl bn_mul_comba8 -.ent bn_mul_comba8 -bn_mul_comba8: - .set noreorder - .frame $29,6*4,$31 - .mask 0x003f0000,-4 - subu $29,6*4 - sw $21,5*4($29) - sw $20,4*4($29) - sw $19,3*4($29) - sw $18,2*4($29) - sw $17,1*4($29) - sw $16,0*4($29) - - .set reorder - lw $12,0($5) # If compiled with -mips3 option on - # R5000 box assembler barks on this - # 1ine with "should not have mult/div - # as last instruction in bb (R10K - # bug)" warning. If anybody out there - # has a clue about how to circumvent - # this do send me a note. - # <appro@fy.chalmers.se> - - lw $8,0($6) - lw $13,4($5) - lw $14,2*4($5) - multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); - lw $15,3*4($5) - lw $9,4($6) - lw $10,2*4($6) - lw $11,3*4($6) - mflo $2 - mfhi $3 - - lw $16,4*4($5) - lw $18,5*4($5) - multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); - lw $20,6*4($5) - lw $5,7*4($5) - lw $17,4*4($6) - lw $19,5*4($6) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); - addu $7,$25,$1 - lw $21,6*4($6) - lw $6,7*4($6) - sw $2,0($4) # r[0]=c1; - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - sw $3,4($4) # r[1]=c2; - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); - addu $25,$1 - addu $2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,2*4($4) # r[2]=c3; - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $7,$3,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $16,$8 # mul_add_c(a[4],b[0],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,3*4($4) # r[3]=c1; - - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $12,$17 # mul_add_c(a[0],b[4],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $12,$19 # mul_add_c(a[0],b[5],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,4*4($4) # r[4]=c2; - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$17 # mul_add_c(a[1],b[4],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $16,$9 # mul_add_c(a[4],b[1],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $18,$8 # mul_add_c(a[5],b[0],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $20,$8 # mul_add_c(a[6],b[0],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,5*4($4) # r[5]=c3; - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $18,$9 # mul_add_c(a[5],b[1],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $7,$3,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $16,$10 # mul_add_c(a[4],b[2],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $14,$17 # mul_add_c(a[2],b[4],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $13,$19 # mul_add_c(a[1],b[5],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $12,$21 # mul_add_c(a[0],b[6],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $12,$6 # mul_add_c(a[0],b[7],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,6*4($4) # r[6]=c1; - - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$21 # mul_add_c(a[1],b[6],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$19 # mul_add_c(a[2],b[5],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $15,$17 # mul_add_c(a[3],b[4],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $16,$11 # mul_add_c(a[4],b[3],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $18,$10 # mul_add_c(a[5],b[2],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $20,$9 # mul_add_c(a[6],b[1],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $5,$8 # mul_add_c(a[7],b[0],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $5,$9 # mul_add_c(a[7],b[1],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,7*4($4) # r[7]=c2; - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $20,$10 # mul_add_c(a[6],b[2],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $18,$11 # mul_add_c(a[5],b[3],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $16,$17 # mul_add_c(a[4],b[4],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $15,$19 # mul_add_c(a[3],b[5],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $14,$21 # mul_add_c(a[2],b[6],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$6 # mul_add_c(a[1],b[7],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $14,$6 # mul_add_c(a[2],b[7],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,8*4($4) # r[8]=c3; - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$21 # mul_add_c(a[3],b[6],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $7,$3,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $16,$19 # mul_add_c(a[4],b[5],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $18,$17 # mul_add_c(a[5],b[4],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $20,$11 # mul_add_c(a[6],b[3],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $5,$10 # mul_add_c(a[7],b[2],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $5,$11 # mul_add_c(a[7],b[3],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,9*4($4) # r[9]=c1; - - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $20,$17 # mul_add_c(a[6],b[4],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $18,$19 # mul_add_c(a[5],b[5],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $16,$21 # mul_add_c(a[4],b[6],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $15,$6 # mul_add_c(a[3],b[7],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $16,$6 # mul_add_c(a[4],b[7],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,10*4($4) # r[10]=c2; - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $18,$21 # mul_add_c(a[5],b[6],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $20,$19 # mul_add_c(a[6],b[5],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $5,$17 # mul_add_c(a[7],b[4],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $5,$19 # mul_add_c(a[7],b[5],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,11*4($4) # r[11]=c3; - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $20,$21 # mul_add_c(a[6],b[6],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $7,$3,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $18,$6 # mul_add_c(a[5],b[7],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $20,$6 # mul_add_c(a[6],b[7],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,12*4($4) # r[12]=c1; - - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $5,$21 # mul_add_c(a[7],b[6],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $5,$6 # mul_add_c(a[7],b[7],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,13*4($4) # r[13]=c2; - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - addu $25,$1 - addu $2,$25 - sw $7,14*4($4) # r[14]=c3; - sw $2,15*4($4) # r[15]=c1; - - .set noreorder - lw $21,5*4($29) - lw $20,4*4($29) - lw $19,3*4($29) - lw $18,2*4($29) - lw $17,1*4($29) - lw $16,0*4($29) - jr $31 - addu $29,6*4 -.end bn_mul_comba8 - -.align 5 -.globl bn_mul_comba4 -.ent bn_mul_comba4 -bn_mul_comba4: - .set reorder - lw $12,0($5) - lw $8,0($6) - lw $13,4($5) - lw $14,2*4($5) - multu $12,$8 # mul_add_c(a[0],b[0],c1,c2,c3); - lw $15,3*4($5) - lw $9,4($6) - lw $10,2*4($6) - lw $11,3*4($6) - mflo $2 - mfhi $3 - sw $2,0($4) - - multu $12,$9 # mul_add_c(a[0],b[1],c2,c3,c1); - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$8 # mul_add_c(a[1],b[0],c2,c3,c1); - addu $7,$25,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$8 # mul_add_c(a[2],b[0],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - sw $3,4($4) - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$9 # mul_add_c(a[1],b[1],c3,c1,c2); - addu $25,$1 - addu $2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$10 # mul_add_c(a[0],b[2],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$11 # mul_add_c(a[0],b[3],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,2*4($4) - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $13,$10 # mul_add_c(a[1],b[2],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $7,$3,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $14,$9 # mul_add_c(a[2],b[1],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$8 # mul_add_c(a[3],b[0],c1,c2,c3); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$9 # mul_add_c(a[3],b[1],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,3*4($4) - - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$10 # mul_add_c(a[2],b[2],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $2,$7,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$11 # mul_add_c(a[1],b[3],c2,c3,c1); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$11 # mul_add_c(a[2],b[3],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,4*4($4) - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $15,$10 # mul_add_c(a[3],b[2],c3,c1,c2); - addu $25,$1 - addu $2,$25 - sltu $3,$2,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $15,$11 # mul_add_c(a[3],b[3],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,5*4($4) - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - addu $25,$1 - addu $3,$25 - sw $2,6*4($4) - sw $3,7*4($4) - - .set noreorder - jr $31 - nop -.end bn_mul_comba4 - -.align 5 -.globl bn_sqr_comba8 -.ent bn_sqr_comba8 -bn_sqr_comba8: - .set reorder - lw $12,0($5) - lw $13,4($5) - lw $14,2*4($5) - lw $15,3*4($5) - - multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); - lw $8,4*4($5) - lw $9,5*4($5) - lw $10,6*4($5) - lw $11,7*4($5) - mflo $2 - mfhi $3 - sw $2,0($4) - - multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $24 - mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 - addu $3,$24 - sltu $1,$3,$24 - addu $7,$25,$1 - sw $3,4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$13 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,2*4($4) - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $13,$14 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $7,$3,$1 - addu $3,$25 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $8,$12 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - sw $2,3*4($4) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $15,$13 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $2,$7,$1 - addu $7,$25 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$14 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $1,$7,$1 - addu $7,$25 - addu $2,$1 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $12,$9 # mul_add_c2(a[0],b[5],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,4*4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$8 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $14,$15 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $1,$2,$1 - addu $2,$25 - addu $3,$1 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $10,$12 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $1,$2,$1 - addu $2,$25 - addu $3,$1 - sltu $25,$2,$25 - addu $3,$25 - sw $7,5*4($4) - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $9,$13 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $7,$3,$1 - addu $3,$25 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $8,$14 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$15 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $12,$11 # mul_add_c2(a[0],b[7],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,6*4($4) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $13,$10 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $2,$7,$1 - addu $7,$25 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$9 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $1,$7,$1 - addu $7,$25 - addu $2,$1 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $15,$8 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $1,$7,$1 - addu $7,$25 - addu $2,$1 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $11,$13 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $1,$7,$1 - addu $7,$25 - addu $2,$1 - sltu $25,$7,$25 - addu $2,$25 - sw $3,7*4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $10,$14 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $9,$15 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $1,$2,$1 - addu $2,$25 - addu $3,$1 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $8,$8 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $1,$2,$1 - addu $2,$25 - addu $3,$1 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $14,$11 # mul_add_c2(a[2],b[7],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,8*4($4) - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$10 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $7,$3,$1 - addu $3,$25 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $8,$9 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $11,$15 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - sw $2,9*4($4) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $10,$8 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $2,$7,$1 - addu $7,$25 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $9,$9 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $1,$7,$1 - addu $7,$25 - addu $2,$1 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $8,$11 # mul_add_c2(a[4],b[7],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,10*4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $9,$10 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $11,$9 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $1,$2,$1 - addu $2,$25 - addu $3,$1 - sltu $25,$2,$25 - addu $3,$25 - sw $7,11*4($4) - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $10,$10 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $7,$3,$1 - addu $3,$25 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $10,$11 # mul_add_c2(a[6],b[7],c2,c3,c1); - addu $25,$1 - addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 - sw $2,12*4($4) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $11,$11 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $2,$7,$1 - addu $7,$25 - sltu $25,$7,$25 - addu $2,$25 - sw $3,13*4($4) - - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - addu $25,$1 - addu $2,$25 - sw $7,14*4($4) - sw $2,15*4($4) - - .set noreorder - jr $31 - nop -.end bn_sqr_comba8 - -.align 5 -.globl bn_sqr_comba4 -.ent bn_sqr_comba4 -bn_sqr_comba4: - .set reorder - lw $12,0($5) - lw $13,4($5) - multu $12,$12 # mul_add_c(a[0],b[0],c1,c2,c3); - lw $14,2*4($5) - lw $15,3*4($5) - mflo $2 - mfhi $3 - sw $2,0($4) - - multu $12,$13 # mul_add_c2(a[0],b[1],c2,c3,c1); - mflo $24 - mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $14,$12 # mul_add_c2(a[2],b[0],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 - addu $3,$24 - sltu $1,$3,$24 - addu $7,$25,$1 - sw $3,4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $13,$13 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $12,$15 # mul_add_c2(a[0],b[3],c1,c2,c3); - addu $25,$1 - addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 - sw $7,2*4($4) - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $13,$14 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $7,$3,$1 - addu $3,$25 - sltu $25,$3,$25 - addu $7,$25 - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - multu $15,$13 # forward multiplication - addu $2,$24 - addu $1,$25 - sltu $24,$2,$24 - addu $3,$1 - addu $25,$24 - sltu $1,$3,$1 - addu $3,$25 - addu $7,$1 - sltu $25,$3,$25 - addu $7,$25 - sw $2,3*4($4) - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$14 # forward multiplication - addu $3,$24 - addu $1,$25 - sltu $24,$3,$24 - addu $7,$1 - addu $25,$24 - sltu $2,$7,$1 - addu $7,$25 - sltu $25,$7,$25 - addu $2,$25 - mflo $24 - mfhi $25 - addu $3,$24 - sltu $1,$3,$24 - multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); - addu $25,$1 - addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 - sw $3,4*4($4) - mflo $24 - mfhi $25 - addu $7,$24 - sltu $1,$7,$24 - multu $15,$15 # forward multiplication - addu $7,$24 - addu $1,$25 - sltu $24,$7,$24 - addu $2,$1 - addu $25,$24 - sltu $3,$2,$1 - addu $2,$25 - sltu $25,$2,$25 - addu $3,$25 - sw $7,5*4($4) - - mflo $24 - mfhi $25 - addu $2,$24 - sltu $1,$2,$24 - addu $25,$1 - addu $3,$25 - sw $2,6*4($4) - sw $3,7*4($4) - - .set noreorder - jr $31 - nop -.end bn_sqr_comba4 |