diff options
Diffstat (limited to 'main/openssl/crypto')
46 files changed, 906 insertions, 3238 deletions
diff --git a/main/openssl/crypto/LPdir_win.c b/main/openssl/crypto/LPdir_win.c index d5b5e2c9..7eea373c 100644 --- a/main/openssl/crypto/LPdir_win.c +++ b/main/openssl/crypto/LPdir_win.c @@ -36,7 +36,7 @@ #if defined(LP_SYS_WINCE) && !defined(FindFirstFile) # define FindFirstFile FindFirstFileW #endif -#if defined(LP_SYS_WINCE) && !defined(FindFirstFile) +#if defined(LP_SYS_WINCE) && !defined(FindNextFile) # define FindNextFile FindNextFileW #endif diff --git a/main/openssl/crypto/aes/asm/aes-mips.pl b/main/openssl/crypto/aes/asm/aes-mips.pl index e5239542..537c8d31 100644 --- a/main/openssl/crypto/aes/asm/aes-mips.pl +++ b/main/openssl/crypto/aes/asm/aes-mips.pl @@ -70,7 +70,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; diff --git a/main/openssl/crypto/armcap.c b/main/openssl/crypto/armcap.c index 7e46d07a..59b74948 100644 --- a/main/openssl/crypto/armcap.c +++ b/main/openssl/crypto/armcap.c @@ -68,6 +68,14 @@ static unsigned long (*getauxval)(unsigned long) = NULL; # define HWCAP_CE_SHA256 (1 << 6) #endif +#ifdef ANDROID +// Works around a bug where Android versions up to and including L don't +// properly restore the signal mask when asked. +#define sigsetjmp(env,savesigs) \ + (sigprocmask(SIG_SETMASK,&ill_act.sa_mask,NULL), \ + sigsetjmp(env,savesigs)) +#endif + void OPENSSL_cpuid_setup(void) { char *e; diff --git a/main/openssl/crypto/asn1/a_bitstr.c b/main/openssl/crypto/asn1/a_bitstr.c index 34179960..4117a67d 100644 --- a/main/openssl/crypto/asn1/a_bitstr.c +++ b/main/openssl/crypto/asn1/a_bitstr.c @@ -136,11 +136,16 @@ ASN1_BIT_STRING *c2i_ASN1_BIT_STRING(ASN1_BIT_STRING **a, p= *pp; i= *(p++); + if (i > 7) + { + i=ASN1_R_INVALID_BIT_STRING_BITS_LEFT; + goto err; + } /* We do this to preserve the settings. If we modify * the settings, via the _set_bit function, we will recalculate * on output */ ret->flags&= ~(ASN1_STRING_FLAG_BITS_LEFT|0x07); /* clear */ - ret->flags|=(ASN1_STRING_FLAG_BITS_LEFT|(i&0x07)); /* set */ + ret->flags|=(ASN1_STRING_FLAG_BITS_LEFT|i); /* set */ if (len-- > 1) /* using one because of the bits left byte */ { diff --git a/main/openssl/crypto/asn1/a_type.c b/main/openssl/crypto/asn1/a_type.c index a45d2f9d..5e1bc762 100644 --- a/main/openssl/crypto/asn1/a_type.c +++ b/main/openssl/crypto/asn1/a_type.c @@ -113,7 +113,7 @@ IMPLEMENT_STACK_OF(ASN1_TYPE) IMPLEMENT_ASN1_SET_OF(ASN1_TYPE) /* Returns 0 if they are equal, != 0 otherwise. */ -int ASN1_TYPE_cmp(ASN1_TYPE *a, ASN1_TYPE *b) +int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b) { int result = -1; diff --git a/main/openssl/crypto/asn1/a_verify.c b/main/openssl/crypto/asn1/a_verify.c index fc84cd3d..78dde1d0 100644 --- a/main/openssl/crypto/asn1/a_verify.c +++ b/main/openssl/crypto/asn1/a_verify.c @@ -90,6 +90,12 @@ int ASN1_verify(i2d_of_void *i2d, X509_ALGOR *a, ASN1_BIT_STRING *signature, ASN1err(ASN1_F_ASN1_VERIFY,ASN1_R_UNKNOWN_MESSAGE_DIGEST_ALGORITHM); goto err; } + + if (signature->type == V_ASN1_BIT_STRING && signature->flags & 0x7) + { + ASN1err(ASN1_F_ASN1_VERIFY, ASN1_R_INVALID_BIT_STRING_BITS_LEFT); + goto err; + } inl=i2d(data,NULL); buf_in=OPENSSL_malloc((unsigned int)inl); @@ -146,6 +152,12 @@ int ASN1_item_verify(const ASN1_ITEM *it, X509_ALGOR *a, return -1; } + if (signature->type == V_ASN1_BIT_STRING && signature->flags & 0x7) + { + ASN1err(ASN1_F_ASN1_ITEM_VERIFY, ASN1_R_INVALID_BIT_STRING_BITS_LEFT); + return -1; + } + EVP_MD_CTX_init(&ctx); /* Convert signature OID into digest and public key OIDs */ diff --git a/main/openssl/crypto/asn1/asn1.h b/main/openssl/crypto/asn1/asn1.h index 220a0c8c..3c45d5d0 100644 --- a/main/openssl/crypto/asn1/asn1.h +++ b/main/openssl/crypto/asn1/asn1.h @@ -776,7 +776,7 @@ DECLARE_ASN1_FUNCTIONS_fname(ASN1_TYPE, ASN1_ANY, ASN1_TYPE) int ASN1_TYPE_get(ASN1_TYPE *a); void ASN1_TYPE_set(ASN1_TYPE *a, int type, void *value); int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value); -int ASN1_TYPE_cmp(ASN1_TYPE *a, ASN1_TYPE *b); +int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b); ASN1_OBJECT * ASN1_OBJECT_new(void ); void ASN1_OBJECT_free(ASN1_OBJECT *a); @@ -1329,6 +1329,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_R_ILLEGAL_TIME_VALUE 184 #define ASN1_R_INTEGER_NOT_ASCII_FORMAT 185 #define ASN1_R_INTEGER_TOO_LARGE_FOR_LONG 128 +#define ASN1_R_INVALID_BIT_STRING_BITS_LEFT 220 #define ASN1_R_INVALID_BMPSTRING_LENGTH 129 #define ASN1_R_INVALID_DIGIT 130 #define ASN1_R_INVALID_MIME_TYPE 205 @@ -1378,6 +1379,7 @@ void ERR_load_ASN1_strings(void); #define ASN1_R_TIME_NOT_ASCII_FORMAT 193 #define ASN1_R_TOO_LONG 155 #define ASN1_R_TYPE_NOT_CONSTRUCTED 156 +#define ASN1_R_TYPE_NOT_PRIMITIVE 218 #define ASN1_R_UNABLE_TO_DECODE_RSA_KEY 157 #define ASN1_R_UNABLE_TO_DECODE_RSA_PRIVATE_KEY 158 #define ASN1_R_UNEXPECTED_EOC 159 diff --git a/main/openssl/crypto/asn1/asn1_err.c b/main/openssl/crypto/asn1/asn1_err.c index aa60203b..568a8416 100644 --- a/main/openssl/crypto/asn1/asn1_err.c +++ b/main/openssl/crypto/asn1/asn1_err.c @@ -1,6 +1,6 @@ /* crypto/asn1/asn1_err.c */ /* ==================================================================== - * Copyright (c) 1999-2011 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2014 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -246,6 +246,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]= {ERR_REASON(ASN1_R_ILLEGAL_TIME_VALUE) ,"illegal time value"}, {ERR_REASON(ASN1_R_INTEGER_NOT_ASCII_FORMAT),"integer not ascii format"}, {ERR_REASON(ASN1_R_INTEGER_TOO_LARGE_FOR_LONG),"integer too large for long"}, +{ERR_REASON(ASN1_R_INVALID_BIT_STRING_BITS_LEFT),"invalid bit string bits left"}, {ERR_REASON(ASN1_R_INVALID_BMPSTRING_LENGTH),"invalid bmpstring length"}, {ERR_REASON(ASN1_R_INVALID_DIGIT) ,"invalid digit"}, {ERR_REASON(ASN1_R_INVALID_MIME_TYPE) ,"invalid mime type"}, @@ -295,6 +296,7 @@ static ERR_STRING_DATA ASN1_str_reasons[]= {ERR_REASON(ASN1_R_TIME_NOT_ASCII_FORMAT),"time not ascii format"}, {ERR_REASON(ASN1_R_TOO_LONG) ,"too long"}, {ERR_REASON(ASN1_R_TYPE_NOT_CONSTRUCTED) ,"type not constructed"}, +{ERR_REASON(ASN1_R_TYPE_NOT_PRIMITIVE) ,"type not primitive"}, {ERR_REASON(ASN1_R_UNABLE_TO_DECODE_RSA_KEY),"unable to decode rsa key"}, {ERR_REASON(ASN1_R_UNABLE_TO_DECODE_RSA_PRIVATE_KEY),"unable to decode rsa private key"}, {ERR_REASON(ASN1_R_UNEXPECTED_EOC) ,"unexpected eoc"}, diff --git a/main/openssl/crypto/asn1/tasn_dec.c b/main/openssl/crypto/asn1/tasn_dec.c index 87d7dfdf..2cbfa814 100644 --- a/main/openssl/crypto/asn1/tasn_dec.c +++ b/main/openssl/crypto/asn1/tasn_dec.c @@ -870,6 +870,14 @@ static int asn1_d2i_ex_primitive(ASN1_VALUE **pval, } else if (cst) { + if (utype == V_ASN1_NULL || utype == V_ASN1_BOOLEAN + || utype == V_ASN1_OBJECT || utype == V_ASN1_INTEGER + || utype == V_ASN1_ENUMERATED) + { + ASN1err(ASN1_F_ASN1_D2I_EX_PRIMITIVE, + ASN1_R_TYPE_NOT_PRIMITIVE); + return 0; + } buf.length = 0; buf.max = 0; buf.data = NULL; diff --git a/main/openssl/crypto/asn1/x_algor.c b/main/openssl/crypto/asn1/x_algor.c index 274e456c..57cc956c 100644 --- a/main/openssl/crypto/asn1/x_algor.c +++ b/main/openssl/crypto/asn1/x_algor.c @@ -142,3 +142,14 @@ void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md) X509_ALGOR_set0(alg, OBJ_nid2obj(EVP_MD_type(md)), param_type, NULL); } + +int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b) + { + int rv; + rv = OBJ_cmp(a->algorithm, b->algorithm); + if (rv) + return rv; + if (!a->parameter && !b->parameter) + return 0; + return ASN1_TYPE_cmp(a->parameter, b->parameter); + } diff --git a/main/openssl/crypto/asn1/x_name.c b/main/openssl/crypto/asn1/x_name.c index d7c23186..22da5704 100644 --- a/main/openssl/crypto/asn1/x_name.c +++ b/main/openssl/crypto/asn1/x_name.c @@ -350,6 +350,8 @@ static int x509_name_canon(X509_NAME *a) set = entry->set; } tmpentry = X509_NAME_ENTRY_new(); + if (!tmpentry) + goto err; tmpentry->object = OBJ_dup(entry->object); if (!asn1_string_canon(tmpentry->value, entry->value)) goto err; diff --git a/main/openssl/crypto/bio/bio.h b/main/openssl/crypto/bio/bio.h index d05fa22a..3ff67275 100644 --- a/main/openssl/crypto/bio/bio.h +++ b/main/openssl/crypto/bio/bio.h @@ -175,6 +175,8 @@ extern "C" { #define BIO_CTRL_DGRAM_SET_NEXT_TIMEOUT 45 /* Next DTLS handshake timeout to * adjust socket timeouts */ +#define BIO_CTRL_DGRAM_GET_MTU_OVERHEAD 49 + #ifndef OPENSSL_NO_SCTP /* SCTP stuff */ #define BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE 50 @@ -610,6 +612,8 @@ int BIO_ctrl_reset_read_request(BIO *b); (int)BIO_ctrl(b, BIO_CTRL_DGRAM_GET_PEER, 0, (char *)peer) #define BIO_dgram_set_peer(b,peer) \ (int)BIO_ctrl(b, BIO_CTRL_DGRAM_SET_PEER, 0, (char *)peer) +#define BIO_dgram_get_mtu_overhead(b) \ + (unsigned int)BIO_ctrl((b), BIO_CTRL_DGRAM_GET_MTU_OVERHEAD, 0, NULL) /* These two aren't currently implemented */ /* int BIO_get_ex_num(BIO *bio); */ diff --git a/main/openssl/crypto/bio/bss_dgram.c b/main/openssl/crypto/bio/bss_dgram.c index d9967e72..0decf944 100644 --- a/main/openssl/crypto/bio/bss_dgram.c +++ b/main/openssl/crypto/bio/bss_dgram.c @@ -454,6 +454,36 @@ static int dgram_write(BIO *b, const char *in, int inl) return(ret); } +static long dgram_get_mtu_overhead(bio_dgram_data *data) + { + long ret; + + switch (data->peer.sa.sa_family) + { + case AF_INET: + /* Assume this is UDP - 20 bytes for IP, 8 bytes for UDP */ + ret = 28; + break; +#if OPENSSL_USE_IPV6 + case AF_INET6: +#ifdef IN6_IS_ADDR_V4MAPPED + if (IN6_IS_ADDR_V4MAPPED(&data->peer.sa_in6.sin6_addr)) + /* Assume this is UDP - 20 bytes for IP, 8 bytes for UDP */ + ret = 28; + else +#endif + /* Assume this is UDP - 40 bytes for IP, 8 bytes for UDP */ + ret = 48; + break; +#endif + default: + /* We don't know. Go with the historical default */ + ret = 28; + break; + } + return ret; + } + static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) { long ret=1; @@ -630,23 +660,24 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) #endif break; case BIO_CTRL_DGRAM_GET_FALLBACK_MTU: + ret = -dgram_get_mtu_overhead(data); switch (data->peer.sa.sa_family) { case AF_INET: - ret = 576 - 20 - 8; + ret += 576; break; #if OPENSSL_USE_IPV6 case AF_INET6: #ifdef IN6_IS_ADDR_V4MAPPED if (IN6_IS_ADDR_V4MAPPED(&data->peer.sa_in6.sin6_addr)) - ret = 576 - 20 - 8; + ret += 576; else #endif - ret = 1280 - 40 - 8; + ret += 1280; break; #endif default: - ret = 576 - 20 - 8; + ret += 576; break; } break; @@ -847,6 +878,9 @@ static long dgram_ctrl(BIO *b, int cmd, long num, void *ptr) ret = 0; break; #endif + case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD: + ret = dgram_get_mtu_overhead(data); + break; default: ret=0; break; @@ -893,10 +927,18 @@ BIO *BIO_new_dgram_sctp(int fd, int close_flag) /* Activate SCTP-AUTH for DATA and FORWARD-TSN chunks */ auth.sauth_chunk = OPENSSL_SCTP_DATA_CHUNK_TYPE; ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk)); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } auth.sauth_chunk = OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE; ret = setsockopt(fd, IPPROTO_SCTP, SCTP_AUTH_CHUNK, &auth, sizeof(struct sctp_authchunk)); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } /* Test if activation was successful. When using accept(), * SCTP-AUTH has to be activated for the listening socket @@ -905,7 +947,13 @@ BIO *BIO_new_dgram_sctp(int fd, int close_flag) authchunks = OPENSSL_malloc(sockopt_len); memset(authchunks, 0, sizeof(sockopt_len)); ret = getsockopt(fd, IPPROTO_SCTP, SCTP_LOCAL_AUTH_CHUNKS, authchunks, &sockopt_len); - OPENSSL_assert(ret >= 0); + + if (ret < 0) + { + OPENSSL_free(authchunks); + BIO_vfree(bio); + return(NULL); + } for (p = (unsigned char*) authchunks->gauth_chunks; p < (unsigned char*) authchunks + sockopt_len; @@ -927,16 +975,28 @@ BIO *BIO_new_dgram_sctp(int fd, int close_flag) event.se_type = SCTP_AUTHENTICATION_EVENT; event.se_on = 1; ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } #else sockopt_len = (socklen_t) sizeof(struct sctp_event_subscribe); ret = getsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, &sockopt_len); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } event.sctp_authentication_event = 1; ret = setsockopt(fd, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } #endif #endif @@ -944,7 +1004,11 @@ BIO *BIO_new_dgram_sctp(int fd, int close_flag) * larger than the max record size of 2^14 + 2048 + 13 */ ret = setsockopt(fd, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT, &optval, sizeof(optval)); - OPENSSL_assert(ret >= 0); + if (ret < 0) + { + BIO_vfree(bio); + return(NULL); + } return(bio); } @@ -982,7 +1046,12 @@ static int dgram_sctp_free(BIO *a) return 0; data = (bio_dgram_sctp_data *)a->ptr; - if(data != NULL) OPENSSL_free(data); + if(data != NULL) + { + if(data->saved_message.data != NULL) + OPENSSL_free(data->saved_message.data); + OPENSSL_free(data); + } return(1); } @@ -1034,6 +1103,13 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) msg.msg_flags = 0; n = recvmsg(b->num, &msg, 0); + if (n <= 0) + { + if (n < 0) + ret = n; + break; + } + if (msg.msg_controllen > 0) { for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) @@ -1073,13 +1149,6 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) } } - if (n <= 0) - { - if (n < 0) - ret = n; - break; - } - if (msg.msg_flags & MSG_NOTIFICATION) { snp = (union sctp_notification*) out; @@ -1099,6 +1168,7 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) dgram_sctp_write(data->saved_message.bio, data->saved_message.data, data->saved_message.length); OPENSSL_free(data->saved_message.data); + data->saved_message.data = NULL; data->saved_message.length = 0; } @@ -1109,16 +1179,28 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) event.se_type = SCTP_SENDER_DRY_EVENT; event.se_on = 0; i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENT, &event, sizeof(struct sctp_event)); - OPENSSL_assert(i >= 0); + if (i < 0) + { + ret = i; + break; + } #else eventsize = sizeof(struct sctp_event_subscribe); i = getsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, &eventsize); - OPENSSL_assert(i >= 0); + if (i < 0) + { + ret = i; + break; + } event.sctp_sender_dry_event = 0; i = setsockopt(b->num, IPPROTO_SCTP, SCTP_EVENTS, &event, sizeof(struct sctp_event_subscribe)); - OPENSSL_assert(i >= 0); + if (i < 0) + { + ret = i; + break; + } #endif } @@ -1151,8 +1233,8 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) */ optlen = (socklen_t) sizeof(int); ret = getsockopt(b->num, SOL_SOCKET, SO_RCVBUF, &optval, &optlen); - OPENSSL_assert(ret >= 0); - OPENSSL_assert(optval >= 18445); + if (ret >= 0) + OPENSSL_assert(optval >= 18445); /* Test if SCTP doesn't partially deliver below * max record size (2^14 + 2048 + 13) @@ -1160,8 +1242,8 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) optlen = (socklen_t) sizeof(int); ret = getsockopt(b->num, IPPROTO_SCTP, SCTP_PARTIAL_DELIVERY_POINT, &optval, &optlen); - OPENSSL_assert(ret >= 0); - OPENSSL_assert(optval >= 18445); + if (ret >= 0) + OPENSSL_assert(optval >= 18445); /* Partially delivered notification??? Probably a bug.... */ OPENSSL_assert(!(msg.msg_flags & MSG_NOTIFICATION)); @@ -1195,15 +1277,15 @@ static int dgram_sctp_read(BIO *b, char *out, int outl) authchunks = OPENSSL_malloc(optlen); memset(authchunks, 0, sizeof(optlen)); ii = getsockopt(b->num, IPPROTO_SCTP, SCTP_PEER_AUTH_CHUNKS, authchunks, &optlen); - OPENSSL_assert(ii >= 0); - for (p = (unsigned char*) authchunks->gauth_chunks; - p < (unsigned char*) authchunks + optlen; - p += sizeof(uint8_t)) - { - if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1; - if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1; - } + if (ii >= 0) + for (p = (unsigned char*) authchunks->gauth_chunks; + p < (unsigned char*) authchunks + optlen; + p += sizeof(uint8_t)) + { + if (*p == OPENSSL_SCTP_DATA_CHUNK_TYPE) auth_data = 1; + if (*p == OPENSSL_SCTP_FORWARD_CUM_TSN_CHUNK_TYPE) auth_forward = 1; + } OPENSSL_free(authchunks); @@ -1258,9 +1340,11 @@ static int dgram_sctp_write(BIO *b, const char *in, int inl) if (data->save_shutdown && !BIO_dgram_sctp_wait_for_dry(b)) { data->saved_message.bio = b; - data->saved_message.length = inl; + if (data->saved_message.data) + OPENSSL_free(data->saved_message.data); data->saved_message.data = OPENSSL_malloc(inl); memcpy(data->saved_message.data, in, inl); + data->saved_message.length = inl; return inl; } @@ -1367,6 +1451,10 @@ static long dgram_sctp_ctrl(BIO *b, int cmd, long num, void *ptr) * Returns always 1. */ break; + case BIO_CTRL_DGRAM_GET_MTU_OVERHEAD: + /* We allow transport protocol fragmentation so this is irrelevant */ + ret = 0; + break; case BIO_CTRL_DGRAM_SCTP_SET_IN_HANDSHAKE: if (num > 0) data->in_handshake = 1; diff --git a/main/openssl/crypto/bn/asm/bn-mips.S b/main/openssl/crypto/bn/asm/bn-mips.S index 2e7cccb7..62136802 100644 --- a/main/openssl/crypto/bn/asm/bn-mips.S +++ b/main/openssl/crypto/bn/asm/bn-mips.S @@ -1549,21 +1549,20 @@ bn_sqr_comba8: sltu $1,$3,$24 addu $7,$25,$1 sw $3,4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $13,$13 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 addu $7,$24 @@ -1574,67 +1573,65 @@ bn_sqr_comba8: sltu $1,$2,$25 addu $3,$1 sw $7,2*4($4) - mflo $24 mfhi $25 - slt $7,$25,$0 - sll $25,1 - multu $13,$14 # mul_add_c2(a[1],b[2],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $13,$14 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $7,$3,$1 addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $8,$12 # mul_add_c2(a[4],b[0],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $8,$12 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 sw $2,3*4($4) - mflo $24 mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $15,$13 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $2,$7,$1 addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $2,$1 - multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $14,$14 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $1,$7,$1 addu $7,$25 - sltu $1,$7,$25 addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 addu $3,$24 @@ -1645,97 +1642,95 @@ bn_sqr_comba8: sltu $1,$7,$25 addu $2,$1 sw $3,4*4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $13,$8 # mul_add_c2(a[1],b[4],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $13,$8 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $3,$1 - multu $14,$15 # mul_add_c2(a[2],b[3],c3,c1,c2); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $14,$15 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $1,$2,$1 addu $2,$25 - sltu $1,$2,$25 addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - multu $10,$12 # mul_add_c2(a[6],b[0],c1,c2,c3); - addu $3,$1 - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $10,$12 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $1,$2,$1 addu $2,$25 - sltu $1,$2,$25 addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 sw $7,5*4($4) - mflo $24 mfhi $25 - slt $7,$25,$0 - sll $25,1 - multu $9,$13 # mul_add_c2(a[5],b[1],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $9,$13 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $7,$3,$1 addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $8,$14 # mul_add_c2(a[4],b[2],c1,c2,c3); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $8,$14 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $15,$15 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 addu $2,$24 @@ -1746,112 +1741,110 @@ bn_sqr_comba8: sltu $1,$3,$25 addu $7,$1 sw $2,6*4($4) - mflo $24 mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $13,$10 # mul_add_c2(a[1],b[6],c2,c3,c1); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $13,$10 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $2,$7,$1 addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $2,$1 - multu $14,$9 # mul_add_c2(a[2],b[5],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $14,$9 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $1,$7,$1 addu $7,$25 - sltu $1,$7,$25 addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $2,$1 - multu $15,$8 # mul_add_c2(a[3],b[4],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $15,$8 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $1,$7,$1 addu $7,$25 - sltu $1,$7,$25 addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $2,$1 - multu $11,$13 # mul_add_c2(a[7],b[1],c3,c1,c2); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $11,$13 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $1,$7,$1 addu $7,$25 - sltu $1,$7,$25 addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 sw $3,7*4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $10,$14 # mul_add_c2(a[6],b[2],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $10,$14 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $3,$1 - multu $9,$15 # mul_add_c2(a[5],b[3],c3,c1,c2); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $9,$15 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $1,$2,$1 addu $2,$25 - sltu $1,$2,$25 addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $3,$1 - multu $8,$8 # mul_add_c(a[4],b[4],c3,c1,c2); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $8,$8 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $1,$2,$1 addu $2,$25 - sltu $1,$2,$25 addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 addu $7,$24 @@ -1862,82 +1855,80 @@ bn_sqr_comba8: sltu $1,$2,$25 addu $3,$1 sw $7,8*4($4) - mflo $24 mfhi $25 - slt $7,$25,$0 - sll $25,1 - multu $15,$10 # mul_add_c2(a[3],b[6],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $15,$10 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $7,$3,$1 addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $8,$9 # mul_add_c2(a[4],b[5],c1,c2,c3); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $8,$9 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $11,$15 # mul_add_c2(a[7],b[3],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $11,$15 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 sw $2,9*4($4) - mflo $24 mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $10,$8 # mul_add_c2(a[6],b[4],c2,c3,c1); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $10,$8 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $2,$7,$1 addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $2,$1 - multu $9,$9 # mul_add_c(a[5],b[5],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $9,$9 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $1,$7,$1 addu $7,$25 - sltu $1,$7,$25 addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 addu $3,$24 @@ -1948,52 +1939,50 @@ bn_sqr_comba8: sltu $1,$7,$25 addu $2,$1 sw $3,10*4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $9,$10 # mul_add_c2(a[5],b[6],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $9,$10 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $3,$1 - multu $11,$9 # mul_add_c2(a[7],b[5],c1,c2,c3); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $11,$9 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $1,$2,$1 addu $2,$25 - sltu $1,$2,$25 addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 sw $7,11*4($4) - mflo $24 mfhi $25 - slt $7,$25,$0 - sll $25,1 - multu $10,$10 # mul_add_c(a[6],b[6],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $10,$10 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $7,$3,$1 addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 addu $2,$24 @@ -2004,21 +1993,20 @@ bn_sqr_comba8: sltu $1,$3,$25 addu $7,$1 sw $2,12*4($4) - mflo $24 mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $11,$11 # mul_add_c(a[7],b[7],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $11,$11 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $2,$7,$1 addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 sw $3,13*4($4) mflo $24 @@ -2062,21 +2050,20 @@ bn_sqr_comba4: sltu $1,$3,$24 addu $7,$25,$1 sw $3,4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $13,$13 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $13,$13 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 mflo $24 mfhi $25 addu $7,$24 @@ -2087,52 +2074,50 @@ bn_sqr_comba4: sltu $1,$2,$25 addu $3,$1 sw $7,2*4($4) - mflo $24 mfhi $25 - slt $7,$25,$0 - sll $25,1 - multu $13,$14 # mul_add_c(a2[1],b[2],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $13,$14 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $7,$3,$1 addu $3,$25 - sltu $1,$3,$25 - addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 mflo $24 mfhi $25 - slt $1,$25,$0 - addu $7,$1 - multu $15,$13 # mul_add_c2(a[3],b[1],c2,c3,c1); - sll $25,1 - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $2,$24 sltu $1,$2,$24 - addu $25,$1 + multu $15,$13 # forward multiplication + addu $2,$24 + addu $1,$25 + sltu $24,$2,$24 + addu $3,$1 + addu $25,$24 + sltu $1,$3,$1 addu $3,$25 - sltu $1,$3,$25 addu $7,$1 + sltu $25,$3,$25 + addu $7,$25 sw $2,3*4($4) - mflo $24 mfhi $25 - slt $2,$25,$0 - sll $25,1 - multu $14,$14 # mul_add_c(a[2],b[2],c2,c3,c1); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $3,$24 sltu $1,$3,$24 - addu $25,$1 + multu $14,$14 # forward multiplication + addu $3,$24 + addu $1,$25 + sltu $24,$3,$24 + addu $7,$1 + addu $25,$24 + sltu $2,$7,$1 addu $7,$25 - sltu $1,$7,$25 - addu $2,$1 + sltu $25,$7,$25 + addu $2,$25 mflo $24 mfhi $25 addu $3,$24 @@ -2143,21 +2128,20 @@ bn_sqr_comba4: sltu $1,$7,$25 addu $2,$1 sw $3,4*4($4) - mflo $24 mfhi $25 - slt $3,$25,$0 - sll $25,1 - multu $15,$15 # mul_add_c(a[3],b[3],c1,c2,c3); - slt $6,$24,$0 - addu $25,$6 - sll $24,1 addu $7,$24 sltu $1,$7,$24 - addu $25,$1 + multu $15,$15 # forward multiplication + addu $7,$24 + addu $1,$25 + sltu $24,$7,$24 + addu $2,$1 + addu $25,$24 + sltu $3,$2,$1 addu $2,$25 - sltu $1,$2,$25 - addu $3,$1 + sltu $25,$2,$25 + addu $3,$25 sw $7,5*4($4) mflo $24 diff --git a/main/openssl/crypto/bn/asm/mips.pl b/main/openssl/crypto/bn/asm/mips.pl index d2f3ef7b..215c9a74 100644 --- a/main/openssl/crypto/bn/asm/mips.pl +++ b/main/openssl/crypto/bn/asm/mips.pl @@ -1872,6 +1872,41 @@ ___ ($a_4,$a_5,$a_6,$a_7)=($b_0,$b_1,$b_2,$b_3); +sub add_c2 () { +my ($hi,$lo,$c0,$c1,$c2, + $warm, # !$warm denotes first call with specific sequence of + # $c_[XYZ] when there is no Z-carry to accumulate yet; + $an,$bn # these two are arguments for multiplication which + # result is used in *next* step [which is why it's + # commented as "forward multiplication" below]; + )=@_; +$code.=<<___; + mflo $lo + mfhi $hi + $ADDU $c0,$lo + sltu $at,$c0,$lo + $MULTU $an,$bn # forward multiplication + $ADDU $c0,$lo + $ADDU $at,$hi + sltu $lo,$c0,$lo + $ADDU $c1,$at + $ADDU $hi,$lo +___ +$code.=<<___ if (!$warm); + sltu $c2,$c1,$at + $ADDU $c1,$hi + sltu $hi,$c1,$hi + $ADDU $c2,$hi +___ +$code.=<<___ if ($warm); + sltu $at,$c1,$at + $ADDU $c1,$hi + $ADDU $c2,$at + sltu $hi,$c1,$hi + $ADDU $c2,$hi +___ +} + $code.=<<___; .align 5 @@ -1920,21 +1955,10 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -1945,67 +1969,19 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_2 # mul_add_c2(a[1],b[2],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_0 # mul_add_c2(a[4],b[0],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_1,$a_2); # mul_add_c2(a[1],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_0); # mul_add_c2(a[4],b[0],c2,c3,c1); +$code.=<<___; $ST $c_1,3*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2016,97 +1992,23 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_4 # mul_add_c2(a[1],b[4],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_2,$a_3 # mul_add_c2(a[2],b[3],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $MULTU $a_6,$a_0 # mul_add_c2(a[6],b[0],c1,c2,c3); - $ADDU $c_2,$at - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_4); # mul_add_c2(a[1],b[4],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_2,$a_3); # mul_add_c2(a[2],b[3],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_6,$a_0); # mul_add_c2(a[6],b[0],c1,c2,c3); +$code.=<<___; $ST $c_3,5*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_5,$a_1 # mul_add_c2(a[5],b[1],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_2 # mul_add_c2(a[4],b[2],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_5,$a_1); # mul_add_c2(a[5],b[1],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_2); # mul_add_c2(a[4],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_1,$t_1 @@ -2117,112 +2019,25 @@ $code.=<<___; sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,6*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_6 # mul_add_c2(a[1],b[6],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_2,$a_5 # mul_add_c2(a[2],b[5],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_3,$a_4 # mul_add_c2(a[3],b[4],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_7,$a_1 # mul_add_c2(a[7],b[1],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_1,$a_6); # mul_add_c2(a[1],b[6],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_2,$a_5); # mul_add_c2(a[2],b[5],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_3,$a_4); # mul_add_c2(a[3],b[4],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_7,$a_1); # mul_add_c2(a[7],b[1],c3,c1,c2); +$code.=<<___; $ST $c_2,7*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_2 # mul_add_c2(a[6],b[2],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_5,$a_3 # mul_add_c2(a[5],b[3],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_4,$a_4 # mul_add_c(a[4],b[4],c3,c1,c2); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_6,$a_2); # mul_add_c2(a[6],b[2],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_5,$a_3); # mul_add_c2(a[5],b[3],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_4,$a_4); # mul_add_c(a[4],b[4],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -2233,82 +2048,21 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,8*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_6 # mul_add_c2(a[3],b[6],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_4,$a_5 # mul_add_c2(a[4],b[5],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_7,$a_3 # mul_add_c2(a[7],b[3],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_3,$a_6); # mul_add_c2(a[3],b[6],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_4,$a_5); # mul_add_c2(a[4],b[5],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_7,$a_3); # mul_add_c2(a[7],b[3],c2,c3,c1); +$code.=<<___; $ST $c_1,9*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_4 # mul_add_c2(a[6],b[4],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_1,$at - $MULTU $a_5,$a_5 # mul_add_c(a[5],b[5],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_6,$a_4); # mul_add_c2(a[6],b[4],c2,c3,c1); + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,1, + $a_5,$a_5); # mul_add_c(a[5],b[5],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2319,52 +2073,17 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,10*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_5,$a_6 # mul_add_c2(a[5],b[6],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_2,$at - $MULTU $a_7,$a_5 # mul_add_c2(a[7],b[5],c1,c2,c3); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_5,$a_6); # mul_add_c2(a[5],b[6],c3,c1,c2); + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,1, + $a_7,$a_5); # mul_add_c2(a[7],b[5],c1,c2,c3); +$code.=<<___; $ST $c_3,11*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_6,$a_6 # mul_add_c(a[6],b[6],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_6,$a_6); # mul_add_c(a[6],b[6],c1,c2,c3); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_1,$t_1 @@ -2375,21 +2094,10 @@ $code.=<<___; sltu $at,$c_2,$t_2 $ADDU $c_3,$at $ST $c_1,12*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_7,$a_7 # mul_add_c(a[7],b[7],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_7,$a_7); # mul_add_c(a[7],b[7],c3,c1,c2); +$code.=<<___; $ST $c_2,13*$BNSZ($a0) mflo $t_1 @@ -2457,21 +2165,10 @@ $code.=<<___; sltu $at,$c_2,$t_1 $ADDU $c_3,$t_2,$at $ST $c_2,$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_1 # mul_add_c(a[1],b[1],c3,c1,c2); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_1,$a_1); # mul_add_c(a[1],b[1],c3,c1,c2); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_3,$t_1 @@ -2482,52 +2179,17 @@ $code.=<<___; sltu $at,$c_1,$t_2 $ADDU $c_2,$at $ST $c_3,2*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_3,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_1,$a_2 # mul_add_c(a2[1],b[2],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at - mflo $t_1 - mfhi $t_2 - slt $at,$t_2,$zero - $ADDU $c_3,$at - $MULTU $a_3,$a_1 # mul_add_c2(a[3],b[1],c2,c3,c1); - $SLL $t_2,1 - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_1,$t_1 - sltu $at,$c_1,$t_1 - $ADDU $t_2,$at - $ADDU $c_2,$t_2 - sltu $at,$c_2,$t_2 - $ADDU $c_3,$at +___ + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,0, + $a_1,$a_2); # mul_add_c2(a2[1],b[2],c1,c2,c3); + &add_c2($t_2,$t_1,$c_1,$c_2,$c_3,1, + $a_3,$a_1); # mul_add_c2(a[3],b[1],c2,c3,c1); +$code.=<<___; $ST $c_1,3*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_1,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_2,$a_2 # mul_add_c(a[2],b[2],c2,c3,c1); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_2,$t_1 - sltu $at,$c_2,$t_1 - $ADDU $t_2,$at - $ADDU $c_3,$t_2 - sltu $at,$c_3,$t_2 - $ADDU $c_1,$at +___ + &add_c2($t_2,$t_1,$c_2,$c_3,$c_1,0, + $a_2,$a_2); # mul_add_c(a[2],b[2],c2,c3,c1); +$code.=<<___; mflo $t_1 mfhi $t_2 $ADDU $c_2,$t_1 @@ -2538,21 +2200,10 @@ $code.=<<___; sltu $at,$c_3,$t_2 $ADDU $c_1,$at $ST $c_2,4*$BNSZ($a0) - - mflo $t_1 - mfhi $t_2 - slt $c_2,$t_2,$zero - $SLL $t_2,1 - $MULTU $a_3,$a_3 # mul_add_c(a[3],b[3],c1,c2,c3); - slt $a2,$t_1,$zero - $ADDU $t_2,$a2 - $SLL $t_1,1 - $ADDU $c_3,$t_1 - sltu $at,$c_3,$t_1 - $ADDU $t_2,$at - $ADDU $c_1,$t_2 - sltu $at,$c_1,$t_2 - $ADDU $c_2,$at +___ + &add_c2($t_2,$t_1,$c_3,$c_1,$c_2,0, + $a_3,$a_3); # mul_add_c(a[3],b[3],c1,c2,c3); +$code.=<<___; $ST $c_3,5*$BNSZ($a0) mflo $t_1 diff --git a/main/openssl/crypto/bn/asm/mips3.S b/main/openssl/crypto/bn/asm/mips3.S deleted file mode 100644 index dca4105c..00000000 --- a/main/openssl/crypto/bn/asm/mips3.S +++ /dev/null @@ -1,2201 +0,0 @@ -.rdata -.asciiz "mips3.s, Version 1.1" -.asciiz "MIPS III/IV ISA artwork by Andy Polyakov <appro@fy.chalmers.se>" - -/* - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. - * - * Rights for redistribution and usage in source and binary forms are - * granted according to the OpenSSL license. Warranty of any kind is - * disclaimed. - * ==================================================================== - */ - -/* - * This is my modest contributon to the OpenSSL project (see - * http://www.openssl.org/ for more information about it) and is - * a drop-in MIPS III/IV ISA replacement for crypto/bn/bn_asm.c - * module. For updates see http://fy.chalmers.se/~appro/hpe/. - * - * The module is designed to work with either of the "new" MIPS ABI(5), - * namely N32 or N64, offered by IRIX 6.x. It's not ment to work under - * IRIX 5.x not only because it doesn't support new ABIs but also - * because 5.x kernels put R4x00 CPU into 32-bit mode and all those - * 64-bit instructions (daddu, dmultu, etc.) found below gonna only - * cause illegal instruction exception:-( - * - * In addition the code depends on preprocessor flags set up by MIPSpro - * compiler driver (either as or cc) and therefore (probably?) can't be - * compiled by the GNU assembler. GNU C driver manages fine though... - * I mean as long as -mmips-as is specified or is the default option, - * because then it simply invokes /usr/bin/as which in turn takes - * perfect care of the preprocessor definitions. Another neat feature - * offered by the MIPSpro assembler is an optimization pass. This gave - * me the opportunity to have the code looking more regular as all those - * architecture dependent instruction rescheduling details were left to - * the assembler. Cool, huh? - * - * Performance improvement is astonishing! 'apps/openssl speed rsa dsa' - * goes way over 3 times faster! - * - * <appro@fy.chalmers.se> - */ -#include <asm.h> -#include <regdef.h> - -#if _MIPS_ISA>=4 -#define MOVNZ(cond,dst,src) \ - movn dst,src,cond -#else -#define MOVNZ(cond,dst,src) \ - .set noreorder; \ - bnezl cond,.+8; \ - move dst,src; \ - .set reorder -#endif - -.text - -.set noat -.set reorder - -#define MINUS4 v1 - -.align 5 -LEAF(bn_mul_add_words) - .set noreorder - bgtzl a2,.L_bn_mul_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_add_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_add_words_tail - -.L_bn_mul_add_words_loop: - dmultu t0,a3 - ld t1,0(a0) - ld t2,8(a1) - ld t3,8(a0) - ld ta0,16(a1) - ld ta1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 /* All manuals say it "compares 32-bit - * values", but it seems to work fine - * even on 64-bit registers. */ - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - - dmultu t2,a3 - ld ta2,24(a1) - ld ta3,24(a0) - daddu t3,v0 - sltu v0,t3,v0 - mflo AT - mfhi t2 - daddu t3,AT - daddu v0,t2 - sltu AT,t3,AT - sd t3,8(a0) - daddu v0,AT - - dmultu ta0,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - daddu ta1,v0 - sltu v0,ta1,v0 - mflo AT - mfhi ta0 - daddu ta1,AT - daddu v0,ta0 - sltu AT,ta1,AT - sd ta1,-16(a0) - daddu v0,AT - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - daddu ta3,v0 - sltu v0,ta3,v0 - mflo AT - mfhi ta2 - daddu ta3,AT - daddu v0,ta2 - sltu AT,ta3,AT - sd ta3,-8(a0) - daddu v0,AT - .set noreorder - bgtzl ta0,.L_bn_mul_add_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_add_words_return: - jr ra - -.L_bn_mul_add_words_tail: - dmultu t0,a3 - ld t1,0(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,0(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,8(a1) - dmultu t0,a3 - ld t1,8(a0) - subu a2,1 - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,8(a0) - daddu v0,AT - beqz a2,.L_bn_mul_add_words_return - - ld t0,16(a1) - dmultu t0,a3 - ld t1,16(a0) - daddu t1,v0 - sltu v0,t1,v0 - mflo AT - mfhi t0 - daddu t1,AT - daddu v0,t0 - sltu AT,t1,AT - sd t1,16(a0) - daddu v0,AT - jr ra -END(bn_mul_add_words) - -.align 5 -LEAF(bn_mul_words) - .set noreorder - bgtzl a2,.L_bn_mul_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_mul_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_mul_words_tail - -.L_bn_mul_words_loop: - dmultu t0,a3 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - - dmultu t2,a3 - subu a2,4 - PTR_ADD a0,32 - PTR_ADD a1,32 - mflo AT - mfhi t2 - daddu v0,AT - sltu t3,v0,AT - sd v0,-24(a0) - daddu v0,t3,t2 - - dmultu ta0,a3 - mflo AT - mfhi ta0 - daddu v0,AT - sltu ta1,v0,AT - sd v0,-16(a0) - daddu v0,ta1,ta0 - - - dmultu ta2,a3 - and ta0,a2,MINUS4 - mflo AT - mfhi ta2 - daddu v0,AT - sltu ta3,v0,AT - sd v0,-8(a0) - daddu v0,ta3,ta2 - .set noreorder - bgtzl ta0,.L_bn_mul_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_mul_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_mul_words_return: - jr ra - -.L_bn_mul_words_tail: - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,0(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,8(a1) - dmultu t0,a3 - subu a2,1 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,8(a0) - daddu v0,t1,t0 - beqz a2,.L_bn_mul_words_return - - ld t0,16(a1) - dmultu t0,a3 - mflo AT - mfhi t0 - daddu v0,AT - sltu t1,v0,AT - sd v0,16(a0) - daddu v0,t1,t0 - jr ra -END(bn_mul_words) - -.align 5 -LEAF(bn_sqr_words) - .set noreorder - bgtzl a2,.L_bn_sqr_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sqr_words_proceed: - li MINUS4,-4 - and ta0,a2,MINUS4 - move v0,zero - beqz ta0,.L_bn_sqr_words_tail - -.L_bn_sqr_words_loop: - dmultu t0,t0 - ld t2,8(a1) - ld ta0,16(a1) - ld ta2,24(a1) - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - - dmultu t2,t2 - subu a2,4 - PTR_ADD a0,64 - PTR_ADD a1,32 - mflo t3 - mfhi t2 - sd t3,-48(a0) - sd t2,-40(a0) - - dmultu ta0,ta0 - mflo ta1 - mfhi ta0 - sd ta1,-32(a0) - sd ta0,-24(a0) - - - dmultu ta2,ta2 - and ta0,a2,MINUS4 - mflo ta3 - mfhi ta2 - sd ta3,-16(a0) - sd ta2,-8(a0) - - .set noreorder - bgtzl ta0,.L_bn_sqr_words_loop - ld t0,0(a1) - - bnezl a2,.L_bn_sqr_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sqr_words_return: - move v0,zero - jr ra - -.L_bn_sqr_words_tail: - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,0(a0) - sd t0,8(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,8(a1) - dmultu t0,t0 - subu a2,1 - mflo t1 - mfhi t0 - sd t1,16(a0) - sd t0,24(a0) - beqz a2,.L_bn_sqr_words_return - - ld t0,16(a1) - dmultu t0,t0 - mflo t1 - mfhi t0 - sd t1,32(a0) - sd t0,40(a0) - jr ra -END(bn_sqr_words) - -.align 5 -LEAF(bn_add_words) - .set noreorder - bgtzl a3,.L_bn_add_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_add_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_add_words_tail - -.L_bn_add_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - daddu ta0,t0 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,-32(a0) - daddu v0,t8 - - daddu ta1,t1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,-24(a0) - daddu v0,t9 - - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,-16(a0) - daddu v0,t8 - - daddu ta3,t3 - sltu t9,ta3,t3 - daddu t3,ta3,v0 - sltu v0,t3,ta3 - sd t3,-8(a0) - daddu v0,t9 - - .set noreorder - bgtzl AT,.L_bn_add_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_add_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_add_words_return: - jr ra - -.L_bn_add_words_tail: - ld ta0,0(a2) - daddu ta0,t0 - subu a3,1 - sltu t8,ta0,t0 - daddu t0,ta0,v0 - sltu v0,t0,ta0 - sd t0,0(a0) - daddu v0,t8 - beqz a3,.L_bn_add_words_return - - ld t1,8(a1) - ld ta1,8(a2) - daddu ta1,t1 - subu a3,1 - sltu t9,ta1,t1 - daddu t1,ta1,v0 - sltu v0,t1,ta1 - sd t1,8(a0) - daddu v0,t9 - beqz a3,.L_bn_add_words_return - - ld t2,16(a1) - ld ta2,16(a2) - daddu ta2,t2 - sltu t8,ta2,t2 - daddu t2,ta2,v0 - sltu v0,t2,ta2 - sd t2,16(a0) - daddu v0,t8 - jr ra -END(bn_add_words) - -.align 5 -LEAF(bn_sub_words) - .set noreorder - bgtzl a3,.L_bn_sub_words_proceed - ld t0,0(a1) - jr ra - move v0,zero - .set reorder - -.L_bn_sub_words_proceed: - li MINUS4,-4 - and AT,a3,MINUS4 - move v0,zero - beqz AT,.L_bn_sub_words_tail - -.L_bn_sub_words_loop: - ld ta0,0(a2) - subu a3,4 - ld t1,8(a1) - and AT,a3,MINUS4 - ld t2,16(a1) - PTR_ADD a2,32 - ld t3,24(a1) - PTR_ADD a0,32 - ld ta1,-24(a2) - PTR_ADD a1,32 - ld ta2,-16(a2) - ld ta3,-8(a2) - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - sd ta0,-32(a0) - MOVNZ (t0,v0,t8) - - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - sd ta1,-24(a0) - MOVNZ (t1,v0,t9) - - - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - sd ta2,-16(a0) - MOVNZ (t2,v0,t8) - - sltu t9,t3,ta3 - dsubu t3,ta3 - dsubu ta3,t3,v0 - sd ta3,-8(a0) - MOVNZ (t3,v0,t9) - - .set noreorder - bgtzl AT,.L_bn_sub_words_loop - ld t0,0(a1) - - bnezl a3,.L_bn_sub_words_tail - ld t0,0(a1) - .set reorder - -.L_bn_sub_words_return: - jr ra - -.L_bn_sub_words_tail: - ld ta0,0(a2) - subu a3,1 - sltu t8,t0,ta0 - dsubu t0,ta0 - dsubu ta0,t0,v0 - MOVNZ (t0,v0,t8) - sd ta0,0(a0) - beqz a3,.L_bn_sub_words_return - - ld t1,8(a1) - subu a3,1 - ld ta1,8(a2) - sltu t9,t1,ta1 - dsubu t1,ta1 - dsubu ta1,t1,v0 - MOVNZ (t1,v0,t9) - sd ta1,8(a0) - beqz a3,.L_bn_sub_words_return - - ld t2,16(a1) - ld ta2,16(a2) - sltu t8,t2,ta2 - dsubu t2,ta2 - dsubu ta2,t2,v0 - MOVNZ (t2,v0,t8) - sd ta2,16(a0) - jr ra -END(bn_sub_words) - -#undef MINUS4 - -.align 5 -LEAF(bn_div_3_words) - .set reorder - move a3,a0 /* we know that bn_div_words doesn't - * touch a3, ta2, ta3 and preserves a2 - * so that we can save two arguments - * and return address in registers - * instead of stack:-) - */ - ld a0,(a3) - move ta2,a1 - ld a1,-8(a3) - bne a0,a2,.L_bn_div_3_words_proceed - li v0,-1 - jr ra -.L_bn_div_3_words_proceed: - move ta3,ra - bal bn_div_words - move ra,ta3 - dmultu ta2,v0 - ld t2,-16(a3) - move ta0,zero - mfhi t1 - mflo t0 - sltu t8,t1,v1 -.L_bn_div_3_words_inner_loop: - bnez t8,.L_bn_div_3_words_inner_loop_done - sgeu AT,t2,t0 - seq t9,t1,v1 - and AT,t9 - sltu t3,t0,ta2 - daddu v1,a2 - dsubu t1,t3 - dsubu t0,ta2 - sltu t8,t1,v1 - sltu ta0,v1,a2 - or t8,ta0 - .set noreorder - beqzl AT,.L_bn_div_3_words_inner_loop - dsubu v0,1 - .set reorder -.L_bn_div_3_words_inner_loop_done: - jr ra -END(bn_div_3_words) - -.align 5 -LEAF(bn_div_words) - .set noreorder - bnezl a2,.L_bn_div_words_proceed - move v1,zero - jr ra - li v0,-1 /* I'd rather signal div-by-zero - * which can be done with 'break 7' */ - -.L_bn_div_words_proceed: - bltz a2,.L_bn_div_words_body - move t9,v1 - dsll a2,1 - bgtz a2,.-4 - addu t9,1 - - .set reorder - negu t1,t9 - li t2,-1 - dsll t2,t1 - and t2,a0 - dsrl AT,a1,t1 - .set noreorder - bnezl t2,.+8 - break 6 /* signal overflow */ - .set reorder - dsll a0,t9 - dsll a1,t9 - or a0,AT - -#define QT ta0 -#define HH ta1 -#define DH v1 -.L_bn_div_words_body: - dsrl DH,a2,32 - sgeu AT,a0,a2 - .set noreorder - bnezl AT,.+8 - dsubu a0,a2 - .set reorder - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div1 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div1: - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop1: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v0,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop1_done - dsubu t1,v0 - dsubu t0,a2 - b .L_bn_div_words_inner_loop1 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop1_done: - - dsll a1,32 - dsubu a0,t3,t0 - dsll v0,QT,32 - - li QT,-1 - dsrl HH,a0,32 - dsrl QT,32 /* q=0xffffffff */ - beq DH,HH,.L_bn_div_words_skip_div2 - ddivu zero,a0,DH - mflo QT -.L_bn_div_words_skip_div2: -#undef DH - dmultu a2,QT - dsll t3,a0,32 - dsrl AT,a1,32 - or t3,AT - mflo t0 - mfhi t1 -.L_bn_div_words_inner_loop2: - sltu t2,t3,t0 - seq t8,HH,t1 - sltu AT,HH,t1 - and t2,t8 - sltu v1,t0,a2 - or AT,t2 - .set noreorder - beqz AT,.L_bn_div_words_inner_loop2_done - dsubu t1,v1 - dsubu t0,a2 - b .L_bn_div_words_inner_loop2 - dsubu QT,1 - .set reorder -.L_bn_div_words_inner_loop2_done: -#undef HH - - dsubu a0,t3,t0 - or v0,QT - dsrl v1,a0,t9 /* v1 contains remainder if anybody wants it */ - dsrl a2,t9 /* restore a2 */ - jr ra -#undef QT -END(bn_div_words) - -#define a_0 t0 -#define a_1 t1 -#define a_2 t2 -#define a_3 t3 -#define b_0 ta0 -#define b_1 ta1 -#define b_2 ta2 -#define b_3 ta3 - -#define a_4 s0 -#define a_5 s2 -#define a_6 s4 -#define a_7 a1 /* once we load a[7] we don't need a anymore */ -#define b_4 s1 -#define b_5 s3 -#define b_6 s5 -#define b_7 a2 /* once we load b[7] we don't need b anymore */ - -#define t_1 t8 -#define t_2 t9 - -#define c_1 v0 -#define c_2 v1 -#define c_3 a3 - -#define FRAME_SIZE 48 - -.align 5 -LEAF(bn_mul_comba8) - .set noreorder - PTR_SUB sp,FRAME_SIZE - .frame sp,64,ra - .set reorder - ld a_0,0(a1) /* If compiled with -mips3 option on - * R5000 box assembler barks on this - * line with "shouldn't have mult/div - * as last instruction in bb (R10K - * bug)" warning. If anybody out there - * has a clue about how to circumvent - * this do send me a note. - * <appro@fy.chalmers.se> - */ - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - sd s0,0(sp) - sd s1,8(sp) - sd s2,16(sp) - sd s3,24(sp) - sd s4,32(sp) - sd s5,40(sp) - mflo c_1 - mfhi c_2 - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - ld b_4,32(a2) - ld b_5,40(a2) - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - ld b_6,48(a2) - ld b_7,56(a2) - sd c_1,0(a0) /* r[0]=c1; */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) /* r[1]=c2; */ - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) /* r[2]=c3; */ - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) /* r[3]=c1; */ - - dmultu a_4,b_0 /* mul_add_c(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_0,b_4 /* mul_add_c(a[0],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) /* r[4]=c2; */ - - dmultu a_0,b_5 /* mul_add_c(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_1,b_4 /* mul_add_c(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_1 /* mul_add_c(a[4],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_0 /* mul_add_c(a[5],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) /* r[5]=c3; */ - - dmultu a_6,b_0 /* mul_add_c(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_5,b_1 /* mul_add_c(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_2 /* mul_add_c(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_4 /* mul_add_c(a[2],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,b_5 /* mul_add_c(a[1],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_0,b_6 /* mul_add_c(a[0],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) /* r[6]=c1; */ - - dmultu a_0,b_7 /* mul_add_c(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_1,b_6 /* mul_add_c(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,b_5 /* mul_add_c(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_4 /* mul_add_c(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_3 /* mul_add_c(a[4],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_2 /* mul_add_c(a[5],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,b_1 /* mul_add_c(a[6],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_7,b_0 /* mul_add_c(a[7],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) /* r[7]=c2; */ - - dmultu a_7,b_1 /* mul_add_c(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_6,b_2 /* mul_add_c(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,b_3 /* mul_add_c(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,b_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_3,b_5 /* mul_add_c(a[3],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,b_6 /* mul_add_c(a[2],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,b_7 /* mul_add_c(a[1],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) /* r[8]=c3; */ - - dmultu a_2,b_7 /* mul_add_c(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_3,b_6 /* mul_add_c(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,b_5 /* mul_add_c(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_4 /* mul_add_c(a[5],b[4],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,b_3 /* mul_add_c(a[6],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_7,b_2 /* mul_add_c(a[7],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) /* r[9]=c1; */ - - dmultu a_7,b_3 /* mul_add_c(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_6,b_4 /* mul_add_c(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,b_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_4,b_6 /* mul_add_c(a[4],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,b_7 /* mul_add_c(a[3],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) /* r[10]=c2; */ - - dmultu a_4,b_7 /* mul_add_c(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_5,b_6 /* mul_add_c(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,b_5 /* mul_add_c(a[6],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_7,b_4 /* mul_add_c(a[7],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) /* r[11]=c3; */ - - dmultu a_7,b_5 /* mul_add_c(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_6,b_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,b_7 /* mul_add_c(a[5],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) /* r[12]=c1; */ - - dmultu a_6,b_7 /* mul_add_c(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_7,b_6 /* mul_add_c(a[7],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) /* r[13]=c2; */ - - dmultu a_7,b_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - ld s0,0(sp) - ld s1,8(sp) - ld s2,16(sp) - ld s3,24(sp) - ld s4,32(sp) - ld s5,40(sp) - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) /* r[14]=c3; */ - sd c_1,120(a0) /* r[15]=c1; */ - - PTR_ADD sp,FRAME_SIZE - - jr ra -END(bn_mul_comba8) - -.align 5 -LEAF(bn_mul_comba4) - .set reorder - ld a_0,0(a1) - ld b_0,0(a2) - ld a_1,8(a1) - ld a_2,16(a1) - dmultu a_0,b_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_3,24(a1) - ld b_1,8(a2) - ld b_2,16(a2) - ld b_3,24(a2) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,b_1 /* mul_add_c(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - dmultu a_1,b_0 /* mul_add_c(a[1],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - sd c_2,8(a0) - - dmultu a_2,b_0 /* mul_add_c(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - dmultu a_1,b_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_0,b_2 /* mul_add_c(a[0],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,b_3 /* mul_add_c(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu c_3,c_2,t_2 - dmultu a_1,b_2 /* mul_add_c(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_2,b_1 /* mul_add_c(a[2],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,b_0 /* mul_add_c(a[3],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,b_1 /* mul_add_c(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu c_1,c_3,t_2 - dmultu a_2,b_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,b_3 /* mul_add_c(a[1],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,b_3 /* mul_add_c(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu c_2,c_1,t_2 - dmultu a_3,b_2 /* mul_add_c(a[3],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,b_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_mul_comba4) - -#undef a_4 -#undef a_5 -#undef a_6 -#undef a_7 -#define a_4 b_0 -#define a_5 b_1 -#define a_6 b_2 -#define a_7 b_3 - -.align 5 -LEAF(bn_sqr_comba8) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - ld a_4,32(a1) - ld a_5,40(a1) - ld a_6,48(a1) - ld a_7,56(a1) - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c2(a[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_4,a_0 /* mul_add_c2(a[4],b[0],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_0,a_5 /* mul_add_c2(a[0],b[5],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_4 /* mul_add_c2(a[1],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_6,a_0 /* mul_add_c2(a[6],b[0],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_5,a_1 /* mul_add_c2(a[5],b[1],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_2 /* mul_add_c2(a[4],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,48(a0) - - dmultu a_0,a_7 /* mul_add_c2(a[0],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_1,a_6 /* mul_add_c2(a[1],b[6],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_5 /* mul_add_c2(a[2],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_3,a_4 /* mul_add_c2(a[3],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,56(a0) - - dmultu a_7,a_1 /* mul_add_c2(a[7],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_6,a_2 /* mul_add_c2(a[6],b[2],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_3 /* mul_add_c2(a[5],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_4,a_4 /* mul_add_c(a[4],b[4],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,64(a0) - - dmultu a_2,a_7 /* mul_add_c2(a[2],b[7],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_3,a_6 /* mul_add_c2(a[3],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_4,a_5 /* mul_add_c2(a[4],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,72(a0) - - dmultu a_7,a_3 /* mul_add_c2(a[7],b[3],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_6,a_4 /* mul_add_c2(a[6],b[4],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_1,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_5,a_5 /* mul_add_c(a[5],b[5],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,80(a0) - - dmultu a_4,a_7 /* mul_add_c2(a[4],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_5,a_6 /* mul_add_c2(a[5],b[6],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_2,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,88(a0) - - dmultu a_7,a_5 /* mul_add_c2(a[7],b[5],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_6,a_6 /* mul_add_c(a[6],b[6],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,96(a0) - - dmultu a_6,a_7 /* mul_add_c2(a[6],b[7],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,104(a0) - - dmultu a_7,a_7 /* mul_add_c(a[7],b[7],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sd c_3,112(a0) - sd c_1,120(a0) - - jr ra -END(bn_sqr_comba8) - -.align 5 -LEAF(bn_sqr_comba4) - .set reorder - ld a_0,0(a1) - ld a_1,8(a1) - ld a_2,16(a1) - ld a_3,24(a1) - dmultu a_0,a_0 /* mul_add_c(a[0],b[0],c1,c2,c3); */ - mflo c_1 - mfhi c_2 - sd c_1,0(a0) - - dmultu a_0,a_1 /* mul_add_c2(a[0],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu c_3,t_2,AT - sd c_2,8(a0) - - dmultu a_2,a_0 /* mul_add_c2(a[2],b[0],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - dmultu a_1,a_1 /* mul_add_c(a[1],b[1],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,16(a0) - - dmultu a_0,a_3 /* mul_add_c2(a[0],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt c_3,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - dmultu a_1,a_2 /* mul_add_c(a2[1],b[2],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - slt AT,t_2,zero - daddu c_3,AT - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sltu AT,c_2,t_2 - daddu c_3,AT - sd c_1,24(a0) - - dmultu a_3,a_1 /* mul_add_c2(a[3],b[1],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - slt c_1,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - dmultu a_2,a_2 /* mul_add_c(a[2],b[2],c2,c3,c1); */ - mflo t_1 - mfhi t_2 - daddu c_2,t_1 - sltu AT,c_2,t_1 - daddu t_2,AT - daddu c_3,t_2 - sltu AT,c_3,t_2 - daddu c_1,AT - sd c_2,32(a0) - - dmultu a_2,a_3 /* mul_add_c2(a[2],b[3],c3,c1,c2); */ - mflo t_1 - mfhi t_2 - slt c_2,t_2,zero - dsll t_2,1 - slt a2,t_1,zero - daddu t_2,a2 - dsll t_1,1 - daddu c_3,t_1 - sltu AT,c_3,t_1 - daddu t_2,AT - daddu c_1,t_2 - sltu AT,c_1,t_2 - daddu c_2,AT - sd c_3,40(a0) - - dmultu a_3,a_3 /* mul_add_c(a[3],b[3],c1,c2,c3); */ - mflo t_1 - mfhi t_2 - daddu c_1,t_1 - sltu AT,c_1,t_1 - daddu t_2,AT - daddu c_2,t_2 - sd c_1,48(a0) - sd c_2,56(a0) - - jr ra -END(bn_sqr_comba4) diff --git a/main/openssl/crypto/bn/asm/x86_64-gcc.c b/main/openssl/crypto/bn/asm/x86_64-gcc.c index 6bcf32f6..e85ee8ce 100644 --- a/main/openssl/crypto/bn/asm/x86_64-gcc.c +++ b/main/openssl/crypto/bn/asm/x86_64-gcc.c @@ -273,6 +273,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ +/* + * Keep in mind that carrying into high part of multiplication result + * can not overflow, because it cannot be all-ones. + */ #if 0 /* original macros are kept for reference purposes */ #define mul_add_c(a,b,c0,c1,c2) { \ @@ -287,10 +291,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ - t2 = t1+t1; c2 += (t2<t1)?1:0; \ - t1 = t0+t0; t2 += (t1<t0)?1:0; \ - c0 += t1; t2 += (c0<t1)?1:0; \ + c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ + c0 += t0; t1 += (c0<t0)?1:0; \ + c1 += t1; c2 += (c1<t1)?1:0; \ } #else #define mul_add_c(a,b,c0,c1,c2) do { \ @@ -328,22 +332,14 @@ BN_ULONG bn_sub_words(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) : "=a"(t1),"=d"(t2) \ : "a"(a),"m"(b) \ : "cc"); \ - asm ("addq %0,%0; adcq %2,%1" \ - : "+d"(t2),"+r"(c2) \ - : "g"(0) \ - : "cc"); \ - asm ("addq %0,%0; adcq %2,%1" \ - : "+a"(t1),"+d"(t2) \ - : "g"(0) \ - : "cc"); \ - asm ("addq %2,%0; adcq %3,%1" \ - : "+r"(c0),"+d"(t2) \ - : "a"(t1),"g"(0) \ - : "cc"); \ - asm ("addq %2,%0; adcq %3,%1" \ - : "+r"(c1),"+r"(c2) \ - : "d"(t2),"g"(0) \ - : "cc"); \ + asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ + : "+r"(c0),"+r"(c1),"+r"(c2) \ + : "r"(t1),"r"(t2),"g"(0) \ + : "cc"); \ + asm ("addq %3,%0; adcq %4,%1; adcq %5,%2" \ + : "+r"(c0),"+r"(c1),"+r"(c2) \ + : "r"(t1),"r"(t2),"g"(0) \ + : "cc"); \ } while (0) #endif diff --git a/main/openssl/crypto/bn/bn.h b/main/openssl/crypto/bn/bn.h index e776c07a..0dd8d889 100644 --- a/main/openssl/crypto/bn/bn.h +++ b/main/openssl/crypto/bn/bn.h @@ -784,7 +784,9 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); #define bn_wcheck_size(bn, words) \ do { \ const BIGNUM *_bnum2 = (bn); \ - assert(words <= (_bnum2)->dmax && words >= (_bnum2)->top); \ + assert((words) <= (_bnum2)->dmax && (words) >= (_bnum2)->top); \ + /* avoid unused variable warning with NDEBUG */ \ + (void)(_bnum2); \ } while(0) #else /* !BN_DEBUG */ diff --git a/main/openssl/crypto/bn/bn_asm.c b/main/openssl/crypto/bn/bn_asm.c index c43c91cc..a33b6341 100644 --- a/main/openssl/crypto/bn/bn_asm.c +++ b/main/openssl/crypto/bn/bn_asm.c @@ -438,6 +438,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ +/* + * Keep in mind that carrying into high part of multiplication result + * can not overflow, because it cannot be all-ones. + */ #ifdef BN_LLONG #define mul_add_c(a,b,c0,c1,c2) \ t=(BN_ULLONG)a*b; \ @@ -478,10 +482,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) #define mul_add_c2(a,b,c0,c1,c2) { \ BN_ULONG ta=(a),tb=(b),t0; \ BN_UMULT_LOHI(t0,t1,ta,tb); \ - t2 = t1+t1; c2 += (t2<t1)?1:0; \ - t1 = t0+t0; t2 += (t1<t0)?1:0; \ - c0 += t1; t2 += (c0<t1)?1:0; \ + c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ + c0 += t0; t1 += (c0<t0)?1:0; \ + c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ @@ -508,10 +512,10 @@ BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, int n) BN_ULONG ta=(a),tb=(b),t0; \ t1 = BN_UMULT_HIGH(ta,tb); \ t0 = ta * tb; \ - t2 = t1+t1; c2 += (t2<t1)?1:0; \ - t1 = t0+t0; t2 += (t1<t0)?1:0; \ - c0 += t1; t2 += (c0<t1)?1:0; \ + c0 += t0; t2 = t1+((c0<t0)?1:0);\ c1 += t2; c2 += (c1<t2)?1:0; \ + c0 += t0; t1 += (c0<t0)?1:0; \ + c1 += t1; c2 += (c1<t1)?1:0; \ } #define sqr_add_c(a,i,c0,c1,c2) { \ diff --git a/main/openssl/crypto/bn/bn_ctx.c b/main/openssl/crypto/bn/bn_ctx.c index 3f2256f6..90aa3aeb 100644 --- a/main/openssl/crypto/bn/bn_ctx.c +++ b/main/openssl/crypto/bn/bn_ctx.c @@ -158,7 +158,7 @@ static void ctxdbg(BN_CTX *ctx) unsigned int bnidx = 0, fpidx = 0; BN_POOL_ITEM *item = ctx->pool.head; BN_STACK *stack = &ctx->stack; - fprintf(stderr,"(%08x): ", (unsigned int)ctx); + fprintf(stderr,"(%16p): ", ctx); while(bnidx < ctx->used) { fprintf(stderr,"%03x ", item->vals[bnidx++ % BN_CTX_POOL_SIZE].dmax); diff --git a/main/openssl/crypto/bn/bn_div.c b/main/openssl/crypto/bn/bn_div.c index 7b240318..0ec90e80 100644 --- a/main/openssl/crypto/bn/bn_div.c +++ b/main/openssl/crypto/bn/bn_div.c @@ -189,15 +189,17 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, int no_branch=0; /* Invalid zero-padding would have particularly bad consequences - * in the case of 'num', so don't just rely on bn_check_top() for this one + * so don't just rely on bn_check_top() here * (bn_check_top() works only for BN_DEBUG builds) */ - if (num->top > 0 && num->d[num->top - 1] == 0) + if ((num->top > 0 && num->d[num->top - 1] == 0) || + (divisor->top > 0 && divisor->d[divisor->top - 1] == 0)) { BNerr(BN_F_BN_DIV,BN_R_NOT_INITIALIZED); return 0; } bn_check_top(num); + bn_check_top(divisor); if ((BN_get_flags(num, BN_FLG_CONSTTIME) != 0) || (BN_get_flags(divisor, BN_FLG_CONSTTIME) != 0)) { @@ -207,7 +209,7 @@ int BN_div(BIGNUM *dv, BIGNUM *rm, const BIGNUM *num, const BIGNUM *divisor, bn_check_top(dv); bn_check_top(rm); /* bn_check_top(num); */ /* 'num' has been checked already */ - bn_check_top(divisor); + /* bn_check_top(divisor); */ /* 'divisor' has been checked already */ if (BN_is_zero(divisor)) { diff --git a/main/openssl/crypto/bn/bntest.c b/main/openssl/crypto/bn/bntest.c index 06f5954a..48bc6338 100644 --- a/main/openssl/crypto/bn/bntest.c +++ b/main/openssl/crypto/bn/bntest.c @@ -107,6 +107,7 @@ int test_mod(BIO *bp,BN_CTX *ctx); int test_mod_mul(BIO *bp,BN_CTX *ctx); int test_mod_exp(BIO *bp,BN_CTX *ctx); int test_mod_exp_mont_consttime(BIO *bp,BN_CTX *ctx); +int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx); int test_exp(BIO *bp,BN_CTX *ctx); int test_gf2m_add(BIO *bp); int test_gf2m_mod(BIO *bp); @@ -249,6 +250,7 @@ int main(int argc, char *argv[]) message(out,"BN_mod_exp_mont_consttime"); if (!test_mod_exp_mont_consttime(out,ctx)) goto err; + if (!test_mod_exp_mont5(out,ctx)) goto err; (void)BIO_flush(out); message(out,"BN_exp"); @@ -676,44 +678,98 @@ int test_mul(BIO *bp) int test_sqr(BIO *bp, BN_CTX *ctx) { - BIGNUM a,c,d,e; - int i; + BIGNUM *a,*c,*d,*e; + int i, ret = 0; - BN_init(&a); - BN_init(&c); - BN_init(&d); - BN_init(&e); + a = BN_new(); + c = BN_new(); + d = BN_new(); + e = BN_new(); + if (a == NULL || c == NULL || d == NULL || e == NULL) + { + goto err; + } for (i=0; i<num0; i++) { - BN_bntest_rand(&a,40+i*10,0,0); - a.neg=rand_neg(); - BN_sqr(&c,&a,ctx); + BN_bntest_rand(a,40+i*10,0,0); + a->neg=rand_neg(); + BN_sqr(c,a,ctx); if (bp != NULL) { if (!results) { - BN_print(bp,&a); + BN_print(bp,a); BIO_puts(bp," * "); - BN_print(bp,&a); + BN_print(bp,a); BIO_puts(bp," - "); } - BN_print(bp,&c); + BN_print(bp,c); BIO_puts(bp,"\n"); } - BN_div(&d,&e,&c,&a,ctx); - BN_sub(&d,&d,&a); - if(!BN_is_zero(&d) || !BN_is_zero(&e)) - { - fprintf(stderr,"Square test failed!\n"); - return 0; - } + BN_div(d,e,c,a,ctx); + BN_sub(d,d,a); + if(!BN_is_zero(d) || !BN_is_zero(e)) + { + fprintf(stderr,"Square test failed!\n"); + goto err; + } } - BN_free(&a); - BN_free(&c); - BN_free(&d); - BN_free(&e); - return(1); + + /* Regression test for a BN_sqr overflow bug. */ + BN_hex2bn(&a, + "80000000000000008000000000000001FFFFFFFFFFFFFFFE0000000000000000"); + BN_sqr(c, a, ctx); + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," * "); + BN_print(bp,a); + BIO_puts(bp," - "); + } + BN_print(bp,c); + BIO_puts(bp,"\n"); + } + BN_mul(d, a, a, ctx); + if (BN_cmp(c, d)) + { + fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " + "different results!\n"); + goto err; + } + + /* Regression test for a BN_sqr overflow bug. */ + BN_hex2bn(&a, + "80000000000000000000000080000001FFFFFFFE000000000000000000000000"); + BN_sqr(c, a, ctx); + if (bp != NULL) + { + if (!results) + { + BN_print(bp,a); + BIO_puts(bp," * "); + BN_print(bp,a); + BIO_puts(bp," - "); + } + BN_print(bp,c); + BIO_puts(bp,"\n"); + } + BN_mul(d, a, a, ctx); + if (BN_cmp(c, d)) + { + fprintf(stderr, "Square test failed: BN_sqr and BN_mul produce " + "different results!\n"); + goto err; + } + ret = 1; +err: + if (a != NULL) BN_free(a); + if (c != NULL) BN_free(c); + if (d != NULL) BN_free(d); + if (e != NULL) BN_free(e); + return ret; } int test_mont(BIO *bp, BN_CTX *ctx) @@ -1012,6 +1068,80 @@ int test_mod_exp_mont_consttime(BIO *bp, BN_CTX *ctx) return(1); } +/* Test constant-time modular exponentiation with 1024-bit inputs, + * which on x86_64 cause a different code branch to be taken. + */ +int test_mod_exp_mont5(BIO *bp, BN_CTX *ctx) + { + BIGNUM *a,*p,*m,*d,*e; + + BN_MONT_CTX *mont; + + a=BN_new(); + p=BN_new(); + m=BN_new(); + d=BN_new(); + e=BN_new(); + + mont = BN_MONT_CTX_new(); + + BN_bntest_rand(m,1024,0,1); /* must be odd for montgomery */ + /* Zero exponent */ + BN_bntest_rand(a,1024,0,0); + BN_zero(p); + if(!BN_mod_exp_mont_consttime(d,a,p,m,ctx,NULL)) + return 0; + if(!BN_is_one(d)) + { + fprintf(stderr, "Modular exponentiation test failed!\n"); + return 0; + } + /* Zero input */ + BN_bntest_rand(p,1024,0,0); + BN_zero(a); + if(!BN_mod_exp_mont_consttime(d,a,p,m,ctx,NULL)) + return 0; + if(!BN_is_zero(d)) + { + fprintf(stderr, "Modular exponentiation test failed!\n"); + return 0; + } + /* Craft an input whose Montgomery representation is 1, + * i.e., shorter than the modulus m, in order to test + * the const time precomputation scattering/gathering. + */ + BN_one(a); + BN_MONT_CTX_set(mont,m,ctx); + if(!BN_from_montgomery(e,a,mont,ctx)) + return 0; + if(!BN_mod_exp_mont_consttime(d,e,p,m,ctx,NULL)) + return 0; + if(!BN_mod_exp_simple(a,e,p,m,ctx)) + return 0; + if(BN_cmp(a,d) != 0) + { + fprintf(stderr,"Modular exponentiation test failed!\n"); + return 0; + } + /* Finally, some regular test vectors. */ + BN_bntest_rand(e,1024,0,0); + if(!BN_mod_exp_mont_consttime(d,e,p,m,ctx,NULL)) + return 0; + if(!BN_mod_exp_simple(a,e,p,m,ctx)) + return 0; + if(BN_cmp(a,d) != 0) + { + fprintf(stderr,"Modular exponentiation test failed!\n"); + return 0; + } + BN_free(a); + BN_free(p); + BN_free(m); + BN_free(d); + BN_free(e); + return(1); + } + int test_exp(BIO *bp, BN_CTX *ctx) { BIGNUM *a,*b,*d,*e,*one; diff --git a/main/openssl/crypto/constant_time_locl.h b/main/openssl/crypto/constant_time_locl.h index c0483939..8af98c16 100644 --- a/main/openssl/crypto/constant_time_locl.h +++ b/main/openssl/crypto/constant_time_locl.h @@ -129,17 +129,12 @@ static inline int constant_time_select_int(unsigned int mask, int a, int b); static inline unsigned int constant_time_msb(unsigned int a) { - return (unsigned int)((int)(a) >> (sizeof(int) * 8 - 1)); + return 0-(a >> (sizeof(a) * 8 - 1)); } static inline unsigned int constant_time_lt(unsigned int a, unsigned int b) { - unsigned int lt; - /* Case 1: msb(a) == msb(b). a < b iff the MSB of a - b is set.*/ - lt = ~(a ^ b) & (a - b); - /* Case 2: msb(a) != msb(b). a < b iff the MSB of b is set. */ - lt |= ~a & b; - return constant_time_msb(lt); + return constant_time_msb(a^((a^b)|((a-b)^b))); } static inline unsigned char constant_time_lt_8(unsigned int a, unsigned int b) @@ -149,12 +144,7 @@ static inline unsigned char constant_time_lt_8(unsigned int a, unsigned int b) static inline unsigned int constant_time_ge(unsigned int a, unsigned int b) { - unsigned int ge; - /* Case 1: msb(a) == msb(b). a >= b iff the MSB of a - b is not set.*/ - ge = ~((a ^ b) | (a - b)); - /* Case 2: msb(a) != msb(b). a >= b iff the MSB of a is set. */ - ge |= a & ~b; - return constant_time_msb(ge); + return ~constant_time_lt(a, b); } static inline unsigned char constant_time_ge_8(unsigned int a, unsigned int b) @@ -204,7 +194,7 @@ static inline unsigned char constant_time_select_8(unsigned char mask, return (unsigned char)(constant_time_select(mask, a, b)); } -inline int constant_time_select_int(unsigned int mask, int a, int b) +static inline int constant_time_select_int(unsigned int mask, int a, int b) { return (int)(constant_time_select(mask, (unsigned)(a), (unsigned)(b))); } diff --git a/main/openssl/crypto/cversion.c b/main/openssl/crypto/cversion.c index ea9f25fd..881957e8 100644 --- a/main/openssl/crypto/cversion.c +++ b/main/openssl/crypto/cversion.c @@ -69,10 +69,7 @@ const char *SSLeay_version(int t) if (t == SSLEAY_BUILT_ON) { #ifdef DATE - static char buf[sizeof(DATE)+11]; - - BIO_snprintf(buf,sizeof buf,"built on: %s",DATE); - return(buf); + return(DATE); #else return("built on: date not available"); #endif @@ -80,10 +77,7 @@ const char *SSLeay_version(int t) if (t == SSLEAY_CFLAGS) { #ifdef CFLAGS - static char buf[sizeof(CFLAGS)+11]; - - BIO_snprintf(buf,sizeof buf,"compiler: %s",CFLAGS); - return(buf); + return(CFLAGS); #else return("compiler: information not available"); #endif @@ -91,10 +85,7 @@ const char *SSLeay_version(int t) if (t == SSLEAY_PLATFORM) { #ifdef PLATFORM - static char buf[sizeof(PLATFORM)+11]; - - BIO_snprintf(buf,sizeof buf,"platform: %s", PLATFORM); - return(buf); + return(PLATFORM); #else return("platform: information not available"); #endif diff --git a/main/openssl/crypto/dsa/dsa_asn1.c b/main/openssl/crypto/dsa/dsa_asn1.c index 60585343..473af873 100644 --- a/main/openssl/crypto/dsa/dsa_asn1.c +++ b/main/openssl/crypto/dsa/dsa_asn1.c @@ -176,13 +176,25 @@ int DSA_verify(int type, const unsigned char *dgst, int dgst_len, const unsigned char *sigbuf, int siglen, DSA *dsa) { DSA_SIG *s; + const unsigned char *p = sigbuf; + unsigned char *der = NULL; + int derlen = -1; int ret=-1; s = DSA_SIG_new(); if (s == NULL) return(ret); - if (d2i_DSA_SIG(&s,&sigbuf,siglen) == NULL) goto err; + if (d2i_DSA_SIG(&s,&p,siglen) == NULL) goto err; + /* Ensure signature uses DER and doesn't have trailing garbage */ + derlen = i2d_DSA_SIG(s, &der); + if (derlen != siglen || memcmp(sigbuf, der, derlen)) + goto err; ret=DSA_do_verify(dgst,dgst_len,s,dsa); err: + if (derlen > 0) + { + OPENSSL_cleanse(der, derlen); + OPENSSL_free(der); + } DSA_SIG_free(s); return(ret); } diff --git a/main/openssl/crypto/dso/dso_dlfcn.c b/main/openssl/crypto/dso/dso_dlfcn.c index 4a56aace..faa9d76b 100644 --- a/main/openssl/crypto/dso/dso_dlfcn.c +++ b/main/openssl/crypto/dso/dso_dlfcn.c @@ -60,10 +60,8 @@ that handle _GNU_SOURCE and other similar macros. Defining it later is simply too late, because those headers are protected from re- inclusion. */ -#ifdef __linux -# ifndef _GNU_SOURCE -# define _GNU_SOURCE /* make sure dladdr is declared */ -# endif +#ifndef _GNU_SOURCE +# define _GNU_SOURCE /* make sure dladdr is declared */ #endif #include <stdio.h> diff --git a/main/openssl/crypto/ec/ec_lib.c b/main/openssl/crypto/ec/ec_lib.c index e2c4741b..0992c393 100644 --- a/main/openssl/crypto/ec/ec_lib.c +++ b/main/openssl/crypto/ec/ec_lib.c @@ -68,7 +68,7 @@ #include "ec_lcl.h" -static const char EC_version[] = "EC" OPENSSL_VERSION_PTEXT; +const char EC_version[] = "EC" OPENSSL_VERSION_PTEXT; /* functions for EC_GROUP objects */ diff --git a/main/openssl/crypto/ec/ec_mult.c b/main/openssl/crypto/ec/ec_mult.c index 19f21675..e81200b2 100644 --- a/main/openssl/crypto/ec/ec_mult.c +++ b/main/openssl/crypto/ec/ec_mult.c @@ -445,15 +445,16 @@ int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, wNAF_len = OPENSSL_malloc(totalnum * sizeof wNAF_len[0]); wNAF = OPENSSL_malloc((totalnum + 1) * sizeof wNAF[0]); /* includes space for pivot */ val_sub = OPENSSL_malloc(totalnum * sizeof val_sub[0]); - + + /* Ensure wNAF is initialised in case we end up going to err */ + if (wNAF) wNAF[0] = NULL; /* preliminary pivot */ + if (!wsize || !wNAF_len || !wNAF || !val_sub) { ECerr(EC_F_EC_WNAF_MUL, ERR_R_MALLOC_FAILURE); goto err; } - wNAF[0] = NULL; /* preliminary pivot */ - /* num_val will be the total number of temporarily precomputed points */ num_val = 0; diff --git a/main/openssl/crypto/ec/ec_pmeth.c b/main/openssl/crypto/ec/ec_pmeth.c index 66ee397d..b62b532c 100644 --- a/main/openssl/crypto/ec/ec_pmeth.c +++ b/main/openssl/crypto/ec/ec_pmeth.c @@ -167,6 +167,7 @@ static int pkey_ec_verify(EVP_PKEY_CTX *ctx, return ret; } +#ifndef OPENSSL_NO_ECDH static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) { int ret; @@ -200,6 +201,7 @@ static int pkey_ec_derive(EVP_PKEY_CTX *ctx, unsigned char *key, size_t *keylen) *keylen = ret; return 1; } +#endif static int pkey_ec_ctrl(EVP_PKEY_CTX *ctx, int type, int p1, void *p2) { @@ -333,7 +335,11 @@ const EVP_PKEY_METHOD ec_pkey_meth = 0,0, 0, +#ifndef OPENSSL_NO_ECDH pkey_ec_derive, +#else + 0, +#endif pkey_ec_ctrl, pkey_ec_ctrl_str diff --git a/main/openssl/crypto/ec/ectest.c b/main/openssl/crypto/ec/ectest.c index d1bf9805..8e4154d8 100644 --- a/main/openssl/crypto/ec/ectest.c +++ b/main/openssl/crypto/ec/ectest.c @@ -1366,7 +1366,7 @@ static const struct nistp_test_params nistp_tests_params[] = }, }; -void nistp_single_test(const struct nistp_test_params *test) +static void nistp_single_test(const struct nistp_test_params *test) { BN_CTX *ctx; BIGNUM *p, *a, *b, *x, *y, *n, *m, *order; @@ -1469,7 +1469,7 @@ void nistp_single_test(const struct nistp_test_params *test) BN_CTX_free(ctx); } -void nistp_tests() +static void nistp_tests() { unsigned i; diff --git a/main/openssl/crypto/ecdsa/ecs_vrf.c b/main/openssl/crypto/ecdsa/ecs_vrf.c index ef9acf7b..188b9d57 100644 --- a/main/openssl/crypto/ecdsa/ecs_vrf.c +++ b/main/openssl/crypto/ecdsa/ecs_vrf.c @@ -57,6 +57,7 @@ */ #include "ecs_locl.h" +#include <string.h> #ifndef OPENSSL_NO_ENGINE #include <openssl/engine.h> #endif @@ -84,13 +85,25 @@ int ECDSA_verify(int type, const unsigned char *dgst, int dgst_len, const unsigned char *sigbuf, int sig_len, EC_KEY *eckey) { ECDSA_SIG *s; + const unsigned char *p = sigbuf; + unsigned char *der = NULL; + int derlen = -1; int ret=-1; s = ECDSA_SIG_new(); if (s == NULL) return(ret); - if (d2i_ECDSA_SIG(&s, &sigbuf, sig_len) == NULL) goto err; + if (d2i_ECDSA_SIG(&s, &p, sig_len) == NULL) goto err; + /* Ensure signature uses DER and doesn't have trailing garbage */ + derlen = i2d_ECDSA_SIG(s, &der); + if (derlen != sig_len || memcmp(sigbuf, der, derlen)) + goto err; ret=ECDSA_do_verify(dgst, dgst_len, s, eckey); err: + if (derlen > 0) + { + OPENSSL_cleanse(der, derlen); + OPENSSL_free(der); + } ECDSA_SIG_free(s); return(ret); } diff --git a/main/openssl/crypto/engine/eng_dyn.c b/main/openssl/crypto/engine/eng_dyn.c index 8fb8634e..b751ffd8 100644 --- a/main/openssl/crypto/engine/eng_dyn.c +++ b/main/openssl/crypto/engine/eng_dyn.c @@ -114,9 +114,6 @@ static const ENGINE_CMD_DEFN dynamic_cmd_defns[] = { ENGINE_CMD_FLAG_NO_INPUT}, {0, NULL, NULL, 0} }; -static const ENGINE_CMD_DEFN dynamic_cmd_defns_empty[] = { - {0, NULL, NULL, 0} - }; /* Loading code stores state inside the ENGINE structure via the "ex_data" * element. We load all our state into a single structure and use that as a diff --git a/main/openssl/crypto/evp/e_des3.c b/main/openssl/crypto/evp/e_des3.c index 8d7b7de2..7e1e8b30 100644 --- a/main/openssl/crypto/evp/e_des3.c +++ b/main/openssl/crypto/evp/e_des3.c @@ -124,12 +124,11 @@ static int des_ede_cbc_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out, #ifdef KSSL_DEBUG { int i; - char *cp; - printf("des_ede_cbc_cipher(ctx=%lx, buflen=%d)\n", ctx, ctx->buf_len); - printf("\t iv= "); + fprintf(stderr,"des_ede_cbc_cipher(ctx=%p, buflen=%d)\n", ctx, ctx->buf_len); + fprintf(stderr,"\t iv= "); for(i=0;i<8;i++) - printf("%02X",ctx->iv[i]); - printf("\n"); + fprintf(stderr,"%02X",ctx->iv[i]); + fprintf(stderr,"\n"); } #endif /* KSSL_DEBUG */ while (inl>=EVP_MAXCHUNK) @@ -260,11 +259,14 @@ static int des_ede3_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key, #ifdef KSSL_DEBUG { int i; - printf("des_ede3_init_key(ctx=%lx)\n", ctx); - printf("\tKEY= "); - for(i=0;i<24;i++) printf("%02X",key[i]); printf("\n"); - printf("\t IV= "); - for(i=0;i<8;i++) printf("%02X",iv[i]); printf("\n"); + fprintf(stderr,"des_ede3_init_key(ctx=%p)\n", ctx); + fprintf(stderr,"\tKEY= "); + for(i=0;i<24;i++) fprintf(stderr,"%02X",key[i]); fprintf(stderr,"\n"); + if (iv) + { + fprintf(stderr,"\t IV= "); + for(i=0;i<8;i++) fprintf(stderr,"%02X",iv[i]); fprintf(stderr,"\n"); + } } #endif /* KSSL_DEBUG */ diff --git a/main/openssl/crypto/evp/evp_enc.c b/main/openssl/crypto/evp/evp_enc.c index 7b1842a5..2e863ac5 100644 --- a/main/openssl/crypto/evp/evp_enc.c +++ b/main/openssl/crypto/evp/evp_enc.c @@ -67,7 +67,6 @@ #ifdef OPENSSL_FIPS #include <openssl/fips.h> #endif -#include "constant_time_locl.h" #include "evp_locl.h" #ifdef OPENSSL_FIPS @@ -501,21 +500,21 @@ int EVP_DecryptFinal(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) { - unsigned int i, b; - unsigned char pad, padding_good; + int i,n; + unsigned int b; *outl=0; if (ctx->cipher->flags & EVP_CIPH_FLAG_CUSTOM_CIPHER) { - int ret = M_do_cipher(ctx, out, NULL, 0); - if (ret < 0) + i = M_do_cipher(ctx, out, NULL, 0); + if (i < 0) return 0; else - *outl = ret; + *outl = i; return 1; } - b=(unsigned int)(ctx->cipher->block_size); + b=ctx->cipher->block_size; if (ctx->flags & EVP_CIPH_NO_PADDING) { if(ctx->buf_len) @@ -534,34 +533,33 @@ int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl) return(0); } OPENSSL_assert(b <= sizeof ctx->final); - pad=ctx->final[b-1]; - - padding_good = (unsigned char)(~constant_time_is_zero_8(pad)); - padding_good &= constant_time_ge_8(b, pad); - - for (i = 1; i < b; ++i) - { - unsigned char is_pad_index = constant_time_lt_8(i, pad); - unsigned char pad_byte_good = constant_time_eq_8(ctx->final[b-i-1], pad); - padding_good &= constant_time_select_8(is_pad_index, pad_byte_good, 0xff); - } /* - * At least 1 byte is always padding, so we always write b - 1 - * bytes to avoid a timing leak. The caller is required to have |b| - * bytes space in |out| by the API contract. + * The following assumes that the ciphertext has been authenticated. + * Otherwise it provides a padding oracle. */ - for (i = 0; i < b - 1; ++i) - out[i] = ctx->final[i] & padding_good; - /* Safe cast: for a good padding, EVP_MAX_IV_LENGTH >= b >= pad */ - *outl = padding_good & ((unsigned char)(b - pad)); - return padding_good & 1; + n=ctx->final[b-1]; + if (n == 0 || n > (int)b) + { + EVPerr(EVP_F_EVP_DECRYPTFINAL_EX,EVP_R_BAD_DECRYPT); + return(0); + } + for (i=0; i<n; i++) + { + if (ctx->final[--b] != n) + { + EVPerr(EVP_F_EVP_DECRYPTFINAL_EX,EVP_R_BAD_DECRYPT); + return(0); + } + } + n=ctx->cipher->block_size-n; + for (i=0; i<n; i++) + out[i]=ctx->final[i]; + *outl=n; } else - { - *outl = 0; - return 1; - } + *outl=0; + return(1); } void EVP_CIPHER_CTX_free(EVP_CIPHER_CTX *ctx) diff --git a/main/openssl/crypto/md32_common.h b/main/openssl/crypto/md32_common.h index bb738195..a75f136c 100644 --- a/main/openssl/crypto/md32_common.h +++ b/main/openssl/crypto/md32_common.h @@ -225,8 +225,7 @@ #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++)))<<24), \ l|=(((unsigned long)(*((c)++)))<<16), \ l|=(((unsigned long)(*((c)++)))<< 8), \ - l|=(((unsigned long)(*((c)++))) ), \ - l) + l|=(((unsigned long)(*((c)++))) ) ) #endif #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ @@ -262,8 +261,7 @@ #define HOST_c2l(c,l) (l =(((unsigned long)(*((c)++))) ), \ l|=(((unsigned long)(*((c)++)))<< 8), \ l|=(((unsigned long)(*((c)++)))<<16), \ - l|=(((unsigned long)(*((c)++)))<<24), \ - l) + l|=(((unsigned long)(*((c)++)))<<24) ) #endif #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ diff --git a/main/openssl/crypto/mem.c b/main/openssl/crypto/mem.c index 1cc62eaf..01052cfa 100644 --- a/main/openssl/crypto/mem.c +++ b/main/openssl/crypto/mem.c @@ -255,10 +255,12 @@ void *CRYPTO_malloc_locked(int num, const char *file, int line) if (num <= 0) return NULL; - allow_customize = 0; + if(allow_customize) + allow_customize = 0; if (malloc_debug_func != NULL) { - allow_customize_debug = 0; + if(allow_customize_debug) + allow_customize_debug = 0; malloc_debug_func(NULL, num, file, line, 0); } ret = malloc_locked_ex_func(num,file,line); @@ -299,10 +301,12 @@ void *CRYPTO_malloc(int num, const char *file, int line) if (num <= 0) return NULL; - allow_customize = 0; + if(allow_customize) + allow_customize = 0; if (malloc_debug_func != NULL) { - allow_customize_debug = 0; + if(allow_customize_debug) + allow_customize_debug = 0; malloc_debug_func(NULL, num, file, line, 0); } ret = malloc_ex_func(num,file,line); diff --git a/main/openssl/crypto/objects/obj_xref.h b/main/openssl/crypto/objects/obj_xref.h index e23938c2..33cb24b0 100644 --- a/main/openssl/crypto/objects/obj_xref.h +++ b/main/openssl/crypto/objects/obj_xref.h @@ -43,9 +43,6 @@ static const nid_triple sigoid_srt[] = static const nid_triple * const sigoid_srt_xref[] = { - &sigoid_srt[29], - &sigoid_srt[17], - &sigoid_srt[18], &sigoid_srt[0], &sigoid_srt[1], &sigoid_srt[7], diff --git a/main/openssl/crypto/objects/objxref.pl b/main/openssl/crypto/objects/objxref.pl index 731d3ae2..6c4c832a 100644 --- a/main/openssl/crypto/objects/objxref.pl +++ b/main/openssl/crypto/objects/objxref.pl @@ -90,7 +90,10 @@ EOF foreach (@srt2) { - my $x = $xref_tbl{$_}[2]; + my ($p1, $p2, $x) = @{$xref_tbl{$_}}; + # If digest or signature algorithm is "undef" then the algorithm + # needs special handling and is excluded from the cross reference table. + next if $p1 eq "undef" || $p2 eq "undef"; print "\t\&sigoid_srt\[$x\],\n"; } diff --git a/main/openssl/crypto/opensslv.h b/main/openssl/crypto/opensslv.h index f375967e..4a1df08f 100644 --- a/main/openssl/crypto/opensslv.h +++ b/main/openssl/crypto/opensslv.h @@ -29,11 +29,11 @@ extern "C" { * (Prior to 0.9.5a beta1, a different scheme was used: MMNNFFRBB for * major minor fix final patch/beta) */ -#define OPENSSL_VERSION_NUMBER 0x100010afL +#define OPENSSL_VERSION_NUMBER 0x100010cfL #ifdef OPENSSL_FIPS -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1j-fips 15 Oct 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1l-fips 15 Jan 2015" #else -#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1j 15 Oct 2014" +#define OPENSSL_VERSION_TEXT "OpenSSL 1.0.1l 15 Jan 2015" #endif #define OPENSSL_VERSION_PTEXT " part of " OPENSSL_VERSION_TEXT diff --git a/main/openssl/crypto/rand/rand_win.c b/main/openssl/crypto/rand/rand_win.c index 34ffcd23..c37c416b 100644 --- a/main/openssl/crypto/rand/rand_win.c +++ b/main/openssl/crypto/rand/rand_win.c @@ -196,12 +196,6 @@ int RAND_poll(void) DWORD w; int good = 0; - /* Determine the OS version we are on so we can turn off things - * that do not work properly. - */ - OSVERSIONINFO osverinfo ; - osverinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFO) ; - GetVersionEx( &osverinfo ) ; #if defined(OPENSSL_SYS_WINCE) # if defined(_WIN32_WCE) && _WIN32_WCE>=300 @@ -281,56 +275,6 @@ int RAND_poll(void) * at random times on Windows 2000. Reported by Jeffrey Altman. * Only use it on NT. */ - /* Wolfgang Marczy <WMarczy@topcall.co.at> reports that - * the RegQueryValueEx call below can hang on NT4.0 (SP6). - * So we don't use this at all for now. */ -#if 0 - if ( osverinfo.dwPlatformId == VER_PLATFORM_WIN32_NT && - osverinfo.dwMajorVersion < 5) - { - /* Read Performance Statistics from NT/2000 registry - * The size of the performance data can vary from call - * to call so we must guess the size of the buffer to use - * and increase its size if we get an ERROR_MORE_DATA - * return instead of ERROR_SUCCESS. - */ - LONG rc=ERROR_MORE_DATA; - char * buf=NULL; - DWORD bufsz=0; - DWORD length; - - while (rc == ERROR_MORE_DATA) - { - buf = realloc(buf,bufsz+8192); - if (!buf) - break; - bufsz += 8192; - - length = bufsz; - rc = RegQueryValueEx(HKEY_PERFORMANCE_DATA, TEXT("Global"), - NULL, NULL, buf, &length); - } - if (rc == ERROR_SUCCESS) - { - /* For entropy count assume only least significant - * byte of each DWORD is random. - */ - RAND_add(&length, sizeof(length), 0); - RAND_add(buf, length, length / 4.0); - - /* Close the Registry Key to allow Windows to cleanup/close - * the open handle - * Note: The 'HKEY_PERFORMANCE_DATA' key is implicitly opened - * when the RegQueryValueEx above is done. However, if - * it is not explicitly closed, it can cause disk - * partition manipulation problems. - */ - RegCloseKey(HKEY_PERFORMANCE_DATA); - } - if (buf) - free(buf); - } -#endif if (advapi) { @@ -383,7 +327,7 @@ int RAND_poll(void) if (advapi) FreeLibrary(advapi); - if ((osverinfo.dwPlatformId != VER_PLATFORM_WIN32_NT || + if ((!check_winnt() || !OPENSSL_isservice()) && (user = LoadLibrary(TEXT("USER32.DLL")))) { @@ -407,8 +351,7 @@ int RAND_poll(void) * on NT4 even though it exists in SP3 (or SP6) and * higher. */ - if ( osverinfo.dwPlatformId == VER_PLATFORM_WIN32_NT && - osverinfo.dwMajorVersion < 5) + if (check_winnt() && !check_win_minplat(5)) cursor = 0; } if (cursor) diff --git a/main/openssl/crypto/sha/asm/sha1-mips.pl b/main/openssl/crypto/sha/asm/sha1-mips.pl index f1a702f3..197bc6b5 100644 --- a/main/openssl/crypto/sha/asm/sha1-mips.pl +++ b/main/openssl/crypto/sha/asm/sha1-mips.pl @@ -64,7 +64,7 @@ if ($flavour =~ /64|n32/i) { # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; diff --git a/main/openssl/crypto/sha/asm/sha512-mips.pl b/main/openssl/crypto/sha/asm/sha512-mips.pl index ffa053bb..6807a2c7 100644 --- a/main/openssl/crypto/sha/asm/sha512-mips.pl +++ b/main/openssl/crypto/sha/asm/sha512-mips.pl @@ -68,7 +68,7 @@ $pf = ($flavour =~ /nubi/i) ? $t0 : $t2; # ###################################################################### -$big_endian=(`echo MIPSEL | $ENV{CC} -E -P -`=~/MIPSEL/)?1:0; +$big_endian=(`echo MIPSEL | $ENV{CC} -E -`=~/MIPSEL/)?1:0 if ($ENV{CC}); for (@ARGV) { $output=$_ if (/^\w[\w\-]*\.\w+$/); } open STDOUT,">$output"; diff --git a/main/openssl/crypto/x509/x509.h b/main/openssl/crypto/x509/x509.h index 092dd745..ed767f84 100644 --- a/main/openssl/crypto/x509/x509.h +++ b/main/openssl/crypto/x509/x509.h @@ -768,6 +768,7 @@ int X509_ALGOR_set0(X509_ALGOR *alg, ASN1_OBJECT *aobj, int ptype, void *pval); void X509_ALGOR_get0(ASN1_OBJECT **paobj, int *pptype, void **ppval, X509_ALGOR *algor); void X509_ALGOR_set_md(X509_ALGOR *alg, const EVP_MD *md); +int X509_ALGOR_cmp(const X509_ALGOR *a, const X509_ALGOR *b); X509_NAME *X509_NAME_dup(X509_NAME *xn); X509_NAME_ENTRY *X509_NAME_ENTRY_dup(X509_NAME_ENTRY *ne); diff --git a/main/openssl/crypto/x509/x509_vpm.c b/main/openssl/crypto/x509/x509_vpm.c index dfd89d89..ba546bd1 100644 --- a/main/openssl/crypto/x509/x509_vpm.c +++ b/main/openssl/crypto/x509/x509_vpm.c @@ -89,6 +89,8 @@ X509_VERIFY_PARAM *X509_VERIFY_PARAM_new(void) { X509_VERIFY_PARAM *param; param = OPENSSL_malloc(sizeof(X509_VERIFY_PARAM)); + if (!param) + return NULL; memset(param, 0, sizeof(X509_VERIFY_PARAM)); x509_verify_param_zero(param); return param; diff --git a/main/openssl/crypto/x509/x_all.c b/main/openssl/crypto/x509/x_all.c index e06602d6..fef55f89 100644 --- a/main/openssl/crypto/x509/x_all.c +++ b/main/openssl/crypto/x509/x_all.c @@ -72,6 +72,8 @@ int X509_verify(X509 *a, EVP_PKEY *r) { + if (X509_ALGOR_cmp(a->sig_alg, a->cert_info->signature)) + return 0; return(ASN1_item_verify(ASN1_ITEM_rptr(X509_CINF),a->sig_alg, a->signature,a->cert_info,r)); } diff --git a/main/openssl/crypto/x509v3/v3_ncons.c b/main/openssl/crypto/x509v3/v3_ncons.c index a01dc64d..3b0f1bd1 100644 --- a/main/openssl/crypto/x509v3/v3_ncons.c +++ b/main/openssl/crypto/x509v3/v3_ncons.c @@ -401,7 +401,7 @@ static int nc_dns(ASN1_IA5STRING *dns, ASN1_IA5STRING *base) if (dns->length > base->length) { dnsptr += dns->length - base->length; - if (dnsptr[-1] != '.') + if (*baseptr != '.' && dnsptr[-1] != '.') return X509_V_ERR_PERMITTED_VIOLATION; } |