diff options
Diffstat (limited to 'openssl/crypto/perlasm')
-rw-r--r-- | openssl/crypto/perlasm/cbc.pl | 349 | ||||
-rwxr-xr-x | openssl/crypto/perlasm/ppc-xlate.pl | 152 | ||||
-rw-r--r-- | openssl/crypto/perlasm/readme | 124 | ||||
-rwxr-xr-x | openssl/crypto/perlasm/x86_64-xlate.pl | 917 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86asm.pl | 207 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86gas.pl | 247 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86masm.pl | 184 | ||||
-rw-r--r-- | openssl/crypto/perlasm/x86nasm.pl | 166 |
8 files changed, 2346 insertions, 0 deletions
diff --git a/openssl/crypto/perlasm/cbc.pl b/openssl/crypto/perlasm/cbc.pl new file mode 100644 index 00000000..6fc25109 --- /dev/null +++ b/openssl/crypto/perlasm/cbc.pl @@ -0,0 +1,349 @@ +#!/usr/local/bin/perl + +# void des_ncbc_encrypt(input, output, length, schedule, ivec, enc) +# des_cblock (*input); +# des_cblock (*output); +# long length; +# des_key_schedule schedule; +# des_cblock (*ivec); +# int enc; +# +# calls +# des_encrypt((DES_LONG *)tin,schedule,DES_ENCRYPT); +# + +#&cbc("des_ncbc_encrypt","des_encrypt",0); +#&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt", +# 1,4,5,3,5,-1); +#&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt", +# 0,4,5,3,5,-1); +#&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3", +# 0,6,7,3,4,5); +# +# When doing a cipher that needs bigendian order, +# for encrypt, the iv is kept in bigendian form, +# while for decrypt, it is kept in little endian. +sub cbc + { + local($name,$enc_func,$dec_func,$swap,$iv_off,$enc_off,$p1,$p2,$p3)=@_; + # name is the function name + # enc_func and dec_func and the functions to call for encrypt/decrypt + # swap is true if byte order needs to be reversed + # iv_off is parameter number for the iv + # enc_off is parameter number for the encrypt/decrypt flag + # p1,p2,p3 are the offsets for parameters to be passed to the + # underlying calls. + + &function_begin_B($name,""); + &comment(""); + + $in="esi"; + $out="edi"; + $count="ebp"; + + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); + + $data_off=4; + $data_off+=4 if ($p1 > 0); + $data_off+=4 if ($p2 > 0); + $data_off+=4 if ($p3 > 0); + + &mov($count, &wparam(2)); # length + + &comment("getting iv ptr from parameter $iv_off"); + &mov("ebx", &wparam($iv_off)); # Get iv ptr + + &mov($in, &DWP(0,"ebx","",0));# iv[0] + &mov($out, &DWP(4,"ebx","",0));# iv[1] + + &push($out); + &push($in); + &push($out); # used in decrypt for iv[1] + &push($in); # used in decrypt for iv[0] + + &mov("ebx", "esp"); # This is the address of tin[2] + + &mov($in, &wparam(0)); # in + &mov($out, &wparam(1)); # out + + # We have loaded them all, how lets push things + &comment("getting encrypt flag from parameter $enc_off"); + &mov("ecx", &wparam($enc_off)); # Get enc flag + if ($p3 > 0) + { + &comment("get and push parameter $p3"); + if ($enc_off != $p3) + { &mov("eax", &wparam($p3)); &push("eax"); } + else { &push("ecx"); } + } + if ($p2 > 0) + { + &comment("get and push parameter $p2"); + if ($enc_off != $p2) + { &mov("eax", &wparam($p2)); &push("eax"); } + else { &push("ecx"); } + } + if ($p1 > 0) + { + &comment("get and push parameter $p1"); + if ($enc_off != $p1) + { &mov("eax", &wparam($p1)); &push("eax"); } + else { &push("ecx"); } + } + &push("ebx"); # push data/iv + + &cmp("ecx",0); + &jz(&label("decrypt")); + + &and($count,0xfffffff8); + &mov("eax", &DWP($data_off,"esp","",0)); # load iv[0] + &mov("ebx", &DWP($data_off+4,"esp","",0)); # load iv[1] + + &jz(&label("encrypt_finish")); + + ############################################################# + + &set_label("encrypt_loop"); + # encrypt start + # "eax" and "ebx" hold iv (or the last cipher text) + + &mov("ecx", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("edx", &DWP(4,$in,"",0)); # second 4 bytes + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + # eax and ebx are the next iv. + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("encrypt_loop")); + +###################################################################3 + &set_label("encrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + &call(&label("PIC_point")); +&set_label("PIC_point"); + &blindpop("edx"); + &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx")); + &mov($count,&DWP(0,"ecx",$count,4)) + &add($count,"edx"); + &xor("ecx","ecx"); + &xor("edx","edx"); + #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4)); + &jmp_ptr($count); + +&set_label("ej7"); + &movb(&HB("edx"), &BP(6,$in,"",0)); + &shl("edx",8); +&set_label("ej6"); + &movb(&HB("edx"), &BP(5,$in,"",0)); +&set_label("ej5"); + &movb(&LB("edx"), &BP(4,$in,"",0)); +&set_label("ej4"); + &mov("ecx", &DWP(0,$in,"",0)); + &jmp(&label("ejend")); +&set_label("ej3"); + &movb(&HB("ecx"), &BP(2,$in,"",0)); + &shl("ecx",8); +&set_label("ej2"); + &movb(&HB("ecx"), &BP(1,$in,"",0)); +&set_label("ej1"); + &movb(&LB("ecx"), &BP(0,$in,"",0)); +&set_label("ejend"); + + &xor("eax", "ecx"); + &xor("ebx", "edx"); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put in array for call + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($enc_func); + + &mov("eax", &DWP($data_off,"esp","",0)); + &mov("ebx", &DWP($data_off+4,"esp","",0)); + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP(0,$out,"",0),"eax"); + &mov(&DWP(4,$out,"",0),"ebx"); + + &jmp(&label("finish")); + + ############################################################# + ############################################################# + &set_label("decrypt",1); + # decrypt start + &and($count,0xfffffff8); + # The next 2 instructions are only for if the jz is taken + &mov("eax", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("ebx", &DWP($data_off+12,"esp","",0)); # get iv[1] + &jz(&label("decrypt_finish")); + + &set_label("decrypt_loop"); + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + + &mov(&DWP(0,$out,"",0),"ecx"); + &mov(&DWP(4,$out,"",0),"edx"); + + &mov(&DWP($data_off+8,"esp","",0), "eax"); # save iv + &mov(&DWP($data_off+12,"esp","",0), "ebx"); # + + &add($in, 8); + &add($out, 8); + + &sub($count, 8); + &jnz(&label("decrypt_loop")); +############################ ENDIT #######################3 + &set_label("decrypt_finish"); + &mov($count, &wparam(2)); # length + &and($count, 7); + &jz(&label("finish")); + + &mov("eax", &DWP(0,$in,"",0)); # load first 4 bytes + &mov("ebx", &DWP(4,$in,"",0)); # second 4 bytes + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov(&DWP($data_off,"esp","",0), "eax"); # put back + &mov(&DWP($data_off+4,"esp","",0), "ebx"); # + + &call($dec_func); + + &mov("eax", &DWP($data_off,"esp","",0)); # get return + &mov("ebx", &DWP($data_off+4,"esp","",0)); # + + &bswap("eax") if $swap; + &bswap("ebx") if $swap; + + &mov("ecx", &DWP($data_off+8,"esp","",0)); # get iv[0] + &mov("edx", &DWP($data_off+12,"esp","",0)); # get iv[1] + + &xor("ecx", "eax"); + &xor("edx", "ebx"); + + # this is for when we exit + &mov("eax", &DWP(0,$in,"",0)); # get old cipher text, + &mov("ebx", &DWP(4,$in,"",0)); # next iv actually + +&set_label("dj7"); + &rotr("edx", 16); + &movb(&BP(6,$out,"",0), &LB("edx")); + &shr("edx",16); +&set_label("dj6"); + &movb(&BP(5,$out,"",0), &HB("edx")); +&set_label("dj5"); + &movb(&BP(4,$out,"",0), &LB("edx")); +&set_label("dj4"); + &mov(&DWP(0,$out,"",0), "ecx"); + &jmp(&label("djend")); +&set_label("dj3"); + &rotr("ecx", 16); + &movb(&BP(2,$out,"",0), &LB("ecx")); + &shl("ecx",16); +&set_label("dj2"); + &movb(&BP(1,$in,"",0), &HB("ecx")); +&set_label("dj1"); + &movb(&BP(0,$in,"",0), &LB("ecx")); +&set_label("djend"); + + # final iv is still in eax:ebx + &jmp(&label("finish")); + + +############################ FINISH #######################3 + &set_label("finish",1); + &mov("ecx", &wparam($iv_off)); # Get iv ptr + + ################################################# + $total=16+4; + $total+=4 if ($p1 > 0); + $total+=4 if ($p2 > 0); + $total+=4 if ($p3 > 0); + &add("esp",$total); + + &mov(&DWP(0,"ecx","",0), "eax"); # save iv + &mov(&DWP(4,"ecx","",0), "ebx"); # save iv + + &function_end_A($name); + + &align(64); + &set_label("cbc_enc_jmp_table"); + &data_word("0"); + &data_word(&label("ej1")."-".&label("PIC_point")); + &data_word(&label("ej2")."-".&label("PIC_point")); + &data_word(&label("ej3")."-".&label("PIC_point")); + &data_word(&label("ej4")."-".&label("PIC_point")); + &data_word(&label("ej5")."-".&label("PIC_point")); + &data_word(&label("ej6")."-".&label("PIC_point")); + &data_word(&label("ej7")."-".&label("PIC_point")); + # not used + #&set_label("cbc_dec_jmp_table",1); + #&data_word("0"); + #&data_word(&label("dj1")."-".&label("PIC_point")); + #&data_word(&label("dj2")."-".&label("PIC_point")); + #&data_word(&label("dj3")."-".&label("PIC_point")); + #&data_word(&label("dj4")."-".&label("PIC_point")); + #&data_word(&label("dj5")."-".&label("PIC_point")); + #&data_word(&label("dj6")."-".&label("PIC_point")); + #&data_word(&label("dj7")."-".&label("PIC_point")); + &align(64); + + &function_end_B($name); + + } + +1; diff --git a/openssl/crypto/perlasm/ppc-xlate.pl b/openssl/crypto/perlasm/ppc-xlate.pl new file mode 100755 index 00000000..4579671c --- /dev/null +++ b/openssl/crypto/perlasm/ppc-xlate.pl @@ -0,0 +1,152 @@ +#!/usr/bin/env perl + +# PowerPC assembler distiller by <appro>. + +my $flavour = shift; +my $output = shift; +open STDOUT,">$output" || die "can't open $output: $!"; + +my %GLOBALS; +my $dotinlocallabels=($flavour=~/linux/)?1:0; + +################################################################ +# directives which need special treatment on different platforms +################################################################ +my $globl = sub { + my $junk = shift; + my $name = shift; + my $global = \$GLOBALS{$name}; + my $ret; + + $name =~ s|^[\.\_]||; + + SWITCH: for ($flavour) { + /aix/ && do { $name = ".$name"; + last; + }; + /osx/ && do { $name = "_$name"; + last; + }; + /linux.*32/ && do { $ret .= ".globl $name\n"; + $ret .= ".type $name,\@function"; + last; + }; + /linux.*64/ && do { $ret .= ".globl .$name\n"; + $ret .= ".type .$name,\@function\n"; + $ret .= ".section \".opd\",\"aw\"\n"; + $ret .= ".globl $name\n"; + $ret .= ".align 3\n"; + $ret .= "$name:\n"; + $ret .= ".quad .$name,.TOC.\@tocbase,0\n"; + $ret .= ".size $name,24\n"; + $ret .= ".previous\n"; + + $name = ".$name"; + last; + }; + } + + $ret = ".globl $name" if (!$ret); + $$global = $name; + $ret; +}; +my $text = sub { + ($flavour =~ /aix/) ? ".csect" : ".text"; +}; +my $machine = sub { + my $junk = shift; + my $arch = shift; + if ($flavour =~ /osx/) + { $arch =~ s/\"//g; + $arch = ($flavour=~/64/) ? "ppc970-64" : "ppc970" if ($arch eq "any"); + } + ".machine $arch"; +}; +my $asciz = sub { + shift; + my $line = join(",",@_); + if ($line =~ /^"(.*)"$/) + { ".byte " . join(",",unpack("C*",$1),0) . "\n.align 2"; } + else + { ""; } +}; + +################################################################ +# simplified mnemonics not handled by at least one assembler +################################################################ +my $cmplw = sub { + my $f = shift; + my $cr = 0; $cr = shift if ($#_>1); + # Some out-of-date 32-bit GNU assembler just can't handle cmplw... + ($flavour =~ /linux.*32/) ? + " .long ".sprintf "0x%x",31<<26|$cr<<23|$_[0]<<16|$_[1]<<11|64 : + " cmplw ".join(',',$cr,@_); +}; +my $bdnz = sub { + my $f = shift; + my $bo = $f=~/[\+\-]/ ? 16+9 : 16; # optional "to be taken" hint + " bc $bo,0,".shift; +} if ($flavour!~/linux/); +my $bltlr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|16<<1 : + " bclr $bo,0"; +}; +my $bnelr = sub { + my $f = shift; + my $bo = $f=~/\-/ ? 4+2 : 4; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%x",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +my $beqlr = sub { + my $f = shift; + my $bo = $f=~/-/ ? 12+2 : 12; # optional "not to be taken" hint + ($flavour =~ /linux/) ? # GNU as doesn't allow most recent hints + " .long ".sprintf "0x%X",19<<26|$bo<<21|2<<16|16<<1 : + " bclr $bo,2"; +}; +# GNU assembler can't handle extrdi rA,rS,16,48, or when sum of last two +# arguments is 64, with "operand out of range" error. +my $extrdi = sub { + my ($f,$ra,$rs,$n,$b) = @_; + $b = ($b+$n)&63; $n = 64-$n; + " rldicl $ra,$rs,$b,$n"; +}; + +while($line=<>) { + + $line =~ s|[#!;].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning... + $line =~ s|\s+$||; # ... and at the end + + { + $line =~ s|\b\.L(\w+)|L$1|g; # common denominator for Locallabel + $line =~ s|\bL(\w+)|\.L$1|g if ($dotinlocallabels); + } + + { + $line =~ s|(^[\.\w]+)\:\s*||; + my $label = $1; + printf "%s:",($GLOBALS{$label} or $label) if ($label); + } + + { + $line =~ s|^\s*(\.?)(\w+)([\.\+\-]?)\s*||; + my $c = $1; $c = "\t" if ($c eq ""); + my $mnemonic = $2; + my $f = $3; + my $opcode = eval("\$$mnemonic"); + $line =~ s|\bc?[rf]([0-9]+)\b|$1|g if ($c ne "." and $flavour !~ /osx/); + if (ref($opcode) eq 'CODE') { $line = &$opcode($f,split(',',$line)); } + elsif ($mnemonic) { $line = $c.$mnemonic.$f."\t".$line; } + } + + print $line if ($line); + print "\n"; +} + +close STDOUT; diff --git a/openssl/crypto/perlasm/readme b/openssl/crypto/perlasm/readme new file mode 100644 index 00000000..f02bbee7 --- /dev/null +++ b/openssl/crypto/perlasm/readme @@ -0,0 +1,124 @@ +The perl scripts in this directory are my 'hack' to generate +multiple different assembler formats via the one origional script. + +The way to use this library is to start with adding the path to this directory +and then include it. + +push(@INC,"perlasm","../../perlasm"); +require "x86asm.pl"; + +The first thing we do is setup the file and type of assember + +&asm_init($ARGV[0],$0); + +The first argument is the 'type'. Currently +'cpp', 'sol', 'a.out', 'elf' or 'win32'. +Argument 2 is the file name. + +The reciprocal function is +&asm_finish() which should be called at the end. + +There are 2 main 'packages'. x86ms.pl, which is the microsoft assembler, +and x86unix.pl which is the unix (gas) version. + +Functions of interest are: +&external_label("des_SPtrans"); declare and external variable +&LB(reg); Low byte for a register +&HB(reg); High byte for a register +&BP(off,base,index,scale) Byte pointer addressing +&DWP(off,base,index,scale) Word pointer addressing +&stack_push(num) Basically a 'sub esp, num*4' with extra +&stack_pop(num) inverse of stack_push +&function_begin(name,extra) Start a function with pushing of + edi, esi, ebx and ebp. extra is extra win32 + external info that may be required. +&function_begin_B(name,extra) Same as norma function_begin but no pushing. +&function_end(name) Call at end of function. +&function_end_A(name) Standard pop and ret, for use inside functions +&function_end_B(name) Call at end but with poping or 'ret'. +&swtmp(num) Address on stack temp word. +&wparam(num) Parameter number num, that was push + in C convention. This all works over pushes + and pops. +&comment("hello there") Put in a comment. +&label("loop") Refer to a label, normally a jmp target. +&set_label("loop") Set a label at this point. +&data_word(word) Put in a word of data. + +So how does this all hold together? Given + +int calc(int len, int *data) + { + int i,j=0; + + for (i=0; i<len; i++) + { + j+=other(data[i]); + } + } + +So a very simple version of this function could be coded as + + push(@INC,"perlasm","../../perlasm"); + require "x86asm.pl"; + + &asm_init($ARGV[0],"cacl.pl"); + + &external_label("other"); + + $tmp1= "eax"; + $j= "edi"; + $data= "esi"; + $i= "ebp"; + + &comment("a simple function"); + &function_begin("calc"); + &mov( $data, &wparam(1)); # data + &xor( $j, $j); + &xor( $i, $i); + + &set_label("loop"); + &cmp( $i, &wparam(0)); + &jge( &label("end")); + + &mov( $tmp1, &DWP(0,$data,$i,4)); + &push( $tmp1); + &call( "other"); + &add( $j, "eax"); + &pop( $tmp1); + &inc( $i); + &jmp( &label("loop")); + + &set_label("end"); + &mov( "eax", $j); + + &function_end("calc"); + + &asm_finish(); + +The above example is very very unoptimised but gives an idea of how +things work. + +There is also a cbc mode function generator in cbc.pl + +&cbc( $name, + $encrypt_function_name, + $decrypt_function_name, + $true_if_byte_swap_needed, + $parameter_number_for_iv, + $parameter_number_for_encrypt_flag, + $first_parameter_to_pass, + $second_parameter_to_pass, + $third_parameter_to_pass); + +So for example, given +void BF_encrypt(BF_LONG *data,BF_KEY *key); +void BF_decrypt(BF_LONG *data,BF_KEY *key); +void BF_cbc_encrypt(unsigned char *in, unsigned char *out, long length, + BF_KEY *ks, unsigned char *iv, int enc); + +&cbc("BF_cbc_encrypt","BF_encrypt","BF_encrypt",1,4,5,3,-1,-1); + +&cbc("des_ncbc_encrypt","des_encrypt","des_encrypt",0,4,5,3,5,-1); +&cbc("des_ede3_cbc_encrypt","des_encrypt3","des_decrypt3",0,6,7,3,4,5); + diff --git a/openssl/crypto/perlasm/x86_64-xlate.pl b/openssl/crypto/perlasm/x86_64-xlate.pl new file mode 100755 index 00000000..e47116b7 --- /dev/null +++ b/openssl/crypto/perlasm/x86_64-xlate.pl @@ -0,0 +1,917 @@ +#!/usr/bin/env perl + +# Ascetic x86_64 AT&T to MASM/NASM assembler translator by <appro>. +# +# Why AT&T to MASM and not vice versa? Several reasons. Because AT&T +# format is way easier to parse. Because it's simpler to "gear" from +# Unix ABI to Windows one [see cross-reference "card" at the end of +# file]. Because Linux targets were available first... +# +# In addition the script also "distills" code suitable for GNU +# assembler, so that it can be compiled with more rigid assemblers, +# such as Solaris /usr/ccs/bin/as. +# +# This translator is not designed to convert *arbitrary* assembler +# code from AT&T format to MASM one. It's designed to convert just +# enough to provide for dual-ABI OpenSSL modules development... +# There *are* limitations and you might have to modify your assembler +# code or this script to achieve the desired result... +# +# Currently recognized limitations: +# +# - can't use multiple ops per line; +# +# Dual-ABI styling rules. +# +# 1. Adhere to Unix register and stack layout [see cross-reference +# ABI "card" at the end for explanation]. +# 2. Forget about "red zone," stick to more traditional blended +# stack frame allocation. If volatile storage is actually required +# that is. If not, just leave the stack as is. +# 3. Functions tagged with ".type name,@function" get crafted with +# unified Win64 prologue and epilogue automatically. If you want +# to take care of ABI differences yourself, tag functions as +# ".type name,@abi-omnipotent" instead. +# 4. To optimize the Win64 prologue you can specify number of input +# arguments as ".type name,@function,N." Keep in mind that if N is +# larger than 6, then you *have to* write "abi-omnipotent" code, +# because >6 cases can't be addressed with unified prologue. +# 5. Name local labels as .L*, do *not* use dynamic labels such as 1: +# (sorry about latter). +# 6. Don't use [or hand-code with .byte] "rep ret." "ret" mnemonic is +# required to identify the spots, where to inject Win64 epilogue! +# But on the pros, it's then prefixed with rep automatically:-) +# 7. Stick to explicit ip-relative addressing. If you have to use +# GOTPCREL addressing, stick to mov symbol@GOTPCREL(%rip),%r??. +# Both are recognized and translated to proper Win64 addressing +# modes. To support legacy code a synthetic directive, .picmeup, +# is implemented. It puts address of the *next* instruction into +# target register, e.g.: +# +# .picmeup %rax +# lea .Label-.(%rax),%rax +# +# 8. In order to provide for structured exception handling unified +# Win64 prologue copies %rsp value to %rax. For further details +# see SEH paragraph at the end. +# 9. .init segment is allowed to contain calls to functions only. +# a. If function accepts more than 4 arguments *and* >4th argument +# is declared as non 64-bit value, do clear its upper part. + +my $flavour = shift; +my $output = shift; +if ($flavour =~ /\./) { $output = $flavour; undef $flavour; } + +{ my ($stddev,$stdino,@junk)=stat(STDOUT); + my ($outdev,$outino,@junk)=stat($output); + + open STDOUT,">$output" || die "can't open $output: $!" + if ($stddev!=$outdev || $stdino!=$outino); +} + +my $gas=1; $gas=0 if ($output =~ /\.asm$/); +my $elf=1; $elf=0 if (!$gas); +my $win64=0; +my $prefix=""; +my $decor=".L"; + +my $masmref=8 + 50727*2**-32; # 8.00.50727 shipped with VS2005 +my $masm=0; +my $PTR=" PTR"; + +my $nasmref=2.03; +my $nasm=0; + +if ($flavour eq "mingw64") { $gas=1; $elf=0; $win64=1; + $prefix=`echo __USER_LABEL_PREFIX__ | $ENV{CC} -E -P -`; + chomp($prefix); + } +elsif ($flavour eq "macosx") { $gas=1; $elf=0; $prefix="_"; $decor="L\$"; } +elsif ($flavour eq "masm") { $gas=0; $elf=0; $masm=$masmref; $win64=1; $decor="\$L\$"; } +elsif ($flavour eq "nasm") { $gas=0; $elf=0; $nasm=$nasmref; $win64=1; $decor="\$L\$"; $PTR=""; } +elsif (!$gas) +{ if ($ENV{ASM} =~ m/nasm/ && `nasm -v` =~ m/version ([0-9]+)\.([0-9]+)/i) + { $nasm = $1 + $2*0.01; $PTR=""; } + elsif (`ml64 2>&1` =~ m/Version ([0-9]+)\.([0-9]+)(\.([0-9]+))?/) + { $masm = $1 + $2*2**-16 + $4*2**-32; } + die "no assembler found on %PATH" if (!($nasm || $masm)); + $win64=1; + $elf=0; + $decor="\$L\$"; +} + +my $current_segment; +my $current_function; +my %globals; + +{ package opcode; # pick up opcodes + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + if ($line =~ /^([a-z][a-z0-9]*)/i) { + $self->{op} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + undef $self->{sz}; + if ($self->{op} =~ /^(movz)b.*/) { # movz is pain... + $self->{op} = $1; + $self->{sz} = "b"; + } elsif ($self->{op} =~ /call|jmp/) { + $self->{sz} = ""; + } elsif ($self->{op} =~ /^p/ && $' !~ /^(ush|op)/) { # SSEn + $self->{sz} = ""; + } elsif ($self->{op} =~ /([a-z]{3,})([qlwb])$/) { + $self->{op} = $1; + $self->{sz} = $2; + } + } + $ret; + } + sub size { + my $self = shift; + my $sz = shift; + $self->{sz} = $sz if (defined($sz) && !defined($self->{sz})); + $self->{sz}; + } + sub out { + my $self = shift; + if ($gas) { + if ($self->{op} eq "movz") { # movz is pain... + sprintf "%s%s%s",$self->{op},$self->{sz},shift; + } elsif ($self->{op} =~ /^set/) { + "$self->{op}"; + } elsif ($self->{op} eq "ret") { + my $epilogue = ""; + if ($win64 && $current_function->{abi} eq "svr4") { + $epilogue = "movq 8(%rsp),%rdi\n\t" . + "movq 16(%rsp),%rsi\n\t"; + } + $epilogue . ".byte 0xf3,0xc3"; + } elsif ($self->{op} eq "call" && !$elf && $current_segment eq ".init") { + ".p2align\t3\n\t.quad"; + } else { + "$self->{op}$self->{sz}"; + } + } else { + $self->{op} =~ s/^movz/movzx/; + if ($self->{op} eq "ret") { + $self->{op} = ""; + if ($win64 && $current_function->{abi} eq "svr4") { + $self->{op} = "mov rdi,QWORD${PTR}[8+rsp]\t;WIN64 epilogue\n\t". + "mov rsi,QWORD${PTR}[16+rsp]\n\t"; + } + $self->{op} .= "DB\t0F3h,0C3h\t\t;repret"; + } elsif ($self->{op} =~ /^(pop|push)f/) { + $self->{op} .= $self->{sz}; + } elsif ($self->{op} eq "call" && $current_segment eq ".CRT\$XCU") { + $self->{op} = "\tDQ"; + } + $self->{op}; + } + } + sub mnemonic { + my $self=shift; + my $op=shift; + $self->{op}=$op if (defined($op)); + $self->{op}; + } +} +{ package const; # pick up constants, which start with $ + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + if ($line =~ /^\$([^,]+)/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + } + $ret; + } + sub out { + my $self = shift; + + if ($gas) { + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{value} + $self->{value} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; + $self->{value} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + sprintf "\$%s",$self->{value}; + } else { + $self->{value} =~ s/(0b[0-1]+)/oct($1)/eig; + $self->{value} =~ s/0x([0-9a-f]+)/0$1h/ig if ($masm); + sprintf "%s",$self->{value}; + } + } +} +{ package ea; # pick up effective addresses: expr(%reg,%reg,scale) + sub re { + my $self = shift; # single instance in enough... + local *line = shift; + undef $ret; + + # optional * ---vvv--- appears in indirect jmp/call + if ($line =~ /^(\*?)([^\(,]*)\(([%\w,]+)\)/) { + $self->{asterisk} = $1; + $self->{label} = $2; + ($self->{base},$self->{index},$self->{scale})=split(/,/,$3); + $self->{scale} = 1 if (!defined($self->{scale})); + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + if ($win64 && $self->{label} =~ s/\@GOTPCREL//) { + die if (opcode->mnemonic() ne "mov"); + opcode->mnemonic("lea"); + } + $self->{base} =~ s/^%//; + $self->{index} =~ s/^%// if (defined($self->{index})); + } + $ret; + } + sub size {} + sub out { + my $self = shift; + my $sz = shift; + + $self->{label} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $self->{label} =~ s/\.L/$decor/g; + + # Silently convert all EAs to 64-bit. This is required for + # elder GNU assembler and results in more compact code, + # *but* most importantly AES module depends on this feature! + $self->{index} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + $self->{base} =~ s/^[er](.?[0-9xpi])[d]?$/r\1/; + + if ($gas) { + # Solaris /usr/ccs/bin/as can't handle multiplications + # in $self->{label}, new gas requires sign extension... + use integer; + $self->{label} =~ s/(?<![\w\$\.])(0x?[0-9a-f]+)/oct($1)/egi; + $self->{label} =~ s/([0-9]+\s*[\*\/\%]\s*[0-9]+)/eval($1)/eg; + $self->{label} =~ s/([0-9]+)/$1<<32>>32/eg; + $self->{label} =~ s/^___imp_/__imp__/ if ($flavour eq "mingw64"); + + if (defined($self->{index})) { + sprintf "%s%s(%%%s,%%%s,%d)",$self->{asterisk}, + $self->{label},$self->{base}, + $self->{index},$self->{scale}; + } else { + sprintf "%s%s(%%%s)", $self->{asterisk},$self->{label},$self->{base}; + } + } else { + %szmap = ( b=>"BYTE$PTR", w=>"WORD$PTR", l=>"DWORD$PTR", q=>"QWORD$PTR" ); + + $self->{label} =~ s/\./\$/g; + $self->{label} =~ s/(?<![\w\$\.])0x([0-9a-f]+)/0$1h/ig; + $self->{label} = "($self->{label})" if ($self->{label} =~ /[\*\+\-\/]/); + $sz="q" if ($self->{asterisk}); + + if (defined($self->{index})) { + sprintf "%s[%s%s*%d+%s]",$szmap{$sz}, + $self->{label}?"$self->{label}+":"", + $self->{index},$self->{scale}, + $self->{base}; + } elsif ($self->{base} eq "rip") { + sprintf "%s[%s]",$szmap{$sz},$self->{label}; + } else { + sprintf "%s[%s%s]",$szmap{$sz}, + $self->{label}?"$self->{label}+":"", + $self->{base}; + } + } + } +} +{ package register; # pick up registers, which start with %. + sub re { + my $class = shift; # muliple instances... + my $self = {}; + local *line = shift; + undef $ret; + + # optional * ---vvv--- appears in indirect jmp/call + if ($line =~ /^(\*?)%(\w+)/) { + bless $self,$class; + $self->{asterisk} = $1; + $self->{value} = $2; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + } + $ret; + } + sub size { + my $self = shift; + undef $ret; + + if ($self->{value} =~ /^r[\d]+b$/i) { $ret="b"; } + elsif ($self->{value} =~ /^r[\d]+w$/i) { $ret="w"; } + elsif ($self->{value} =~ /^r[\d]+d$/i) { $ret="l"; } + elsif ($self->{value} =~ /^r[\w]+$/i) { $ret="q"; } + elsif ($self->{value} =~ /^[a-d][hl]$/i){ $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}l$/i) { $ret="b"; } + elsif ($self->{value} =~ /^[\w]{2}$/i) { $ret="w"; } + elsif ($self->{value} =~ /^e[a-z]{2}$/i){ $ret="l"; } + + $ret; + } + sub out { + my $self = shift; + if ($gas) { sprintf "%s%%%s",$self->{asterisk},$self->{value}; } + else { $self->{value}; } + } +} +{ package label; # pick up labels, which end with : + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + + if ($line =~ /(^[\.\w]+)\:/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + $self->{value} =~ s/^\.L/$decor/; + } + $ret; + } + sub out { + my $self = shift; + + if ($gas) { + my $func = ($globals{$self->{value}} or $self->{value}) . ":"; + if ($win64 && + $current_function->{name} eq $self->{value} && + $current_function->{abi} eq "svr4") { + $func .= "\n"; + $func .= " movq %rdi,8(%rsp)\n"; + $func .= " movq %rsi,16(%rsp)\n"; + $func .= " movq %rsp,%rax\n"; + $func .= "${decor}SEH_begin_$current_function->{name}:\n"; + my $narg = $current_function->{narg}; + $narg=6 if (!defined($narg)); + $func .= " movq %rcx,%rdi\n" if ($narg>0); + $func .= " movq %rdx,%rsi\n" if ($narg>1); + $func .= " movq %r8,%rdx\n" if ($narg>2); + $func .= " movq %r9,%rcx\n" if ($narg>3); + $func .= " movq 40(%rsp),%r8\n" if ($narg>4); + $func .= " movq 48(%rsp),%r9\n" if ($narg>5); + } + $func; + } elsif ($self->{value} ne "$current_function->{name}") { + $self->{value} .= ":" if ($masm && $ret!~m/^\$/); + $self->{value} . ":"; + } elsif ($win64 && $current_function->{abi} eq "svr4") { + my $func = "$current_function->{name}" . + ($nasm ? ":" : "\tPROC $current_function->{scope}") . + "\n"; + $func .= " mov QWORD${PTR}[8+rsp],rdi\t;WIN64 prologue\n"; + $func .= " mov QWORD${PTR}[16+rsp],rsi\n"; + $func .= " mov rax,rsp\n"; + $func .= "${decor}SEH_begin_$current_function->{name}:"; + $func .= ":" if ($masm); + $func .= "\n"; + my $narg = $current_function->{narg}; + $narg=6 if (!defined($narg)); + $func .= " mov rdi,rcx\n" if ($narg>0); + $func .= " mov rsi,rdx\n" if ($narg>1); + $func .= " mov rdx,r8\n" if ($narg>2); + $func .= " mov rcx,r9\n" if ($narg>3); + $func .= " mov r8,QWORD${PTR}[40+rsp]\n" if ($narg>4); + $func .= " mov r9,QWORD${PTR}[48+rsp]\n" if ($narg>5); + $func .= "\n"; + } else { + "$current_function->{name}". + ($nasm ? ":" : "\tPROC $current_function->{scope}"); + } + } +} +{ package expr; # pick up expressioins + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + + if ($line =~ /(^[^,]+)/) { + $self->{value} = $1; + $ret = $self; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + $self->{value} =~ s/\@PLT// if (!$elf); + $self->{value} =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $self->{value} =~ s/\.L/$decor/g; + } + $ret; + } + sub out { + my $self = shift; + if ($nasm && opcode->mnemonic()=~m/^j/) { + "NEAR ".$self->{value}; + } else { + $self->{value}; + } + } +} +{ package directive; # pick up directives, which start with . + sub re { + my $self = shift; # single instance is enough... + local *line = shift; + undef $ret; + my $dir; + my %opcode = # lea 2f-1f(%rip),%dst; 1: nop; 2: + ( "%rax"=>0x01058d48, "%rcx"=>0x010d8d48, + "%rdx"=>0x01158d48, "%rbx"=>0x011d8d48, + "%rsp"=>0x01258d48, "%rbp"=>0x012d8d48, + "%rsi"=>0x01358d48, "%rdi"=>0x013d8d48, + "%r8" =>0x01058d4c, "%r9" =>0x010d8d4c, + "%r10"=>0x01158d4c, "%r11"=>0x011d8d4c, + "%r12"=>0x01258d4c, "%r13"=>0x012d8d4c, + "%r14"=>0x01358d4c, "%r15"=>0x013d8d4c ); + + if ($line =~ /^\s*(\.\w+)/) { + $dir = $1; + $ret = $self; + undef $self->{value}; + $line = substr($line,@+[0]); $line =~ s/^\s+//; + + SWITCH: for ($dir) { + /\.picmeup/ && do { if ($line =~ /(%r[\w]+)/i) { + $dir="\t.long"; + $line=sprintf "0x%x,0x90000000",$opcode{$1}; + } + last; + }; + /\.global|\.globl|\.extern/ + && do { $globals{$line} = $prefix . $line; + $line = $globals{$line} if ($prefix); + last; + }; + /\.type/ && do { ($sym,$type,$narg) = split(',',$line); + if ($type eq "\@function") { + undef $current_function; + $current_function->{name} = $sym; + $current_function->{abi} = "svr4"; + $current_function->{narg} = $narg; + $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; + } elsif ($type eq "\@abi-omnipotent") { + undef $current_function; + $current_function->{name} = $sym; + $current_function->{scope} = defined($globals{$sym})?"PUBLIC":"PRIVATE"; + } + $line =~ s/\@abi\-omnipotent/\@function/; + $line =~ s/\@function.*/\@function/; + last; + }; + /\.asciz/ && do { if ($line =~ /^"(.*)"$/) { + $dir = ".byte"; + $line = join(",",unpack("C*",$1),0); + } + last; + }; + /\.rva|\.long|\.quad/ + && do { $line =~ s/([_a-z][_a-z0-9]*)/$globals{$1} or $1/gei; + $line =~ s/\.L/$decor/g; + last; + }; + } + + if ($gas) { + $self->{value} = $dir . "\t" . $line; + + if ($dir =~ /\.extern/) { + $self->{value} = ""; # swallow extern + } elsif (!$elf && $dir =~ /\.type/) { + $self->{value} = ""; + $self->{value} = ".def\t" . ($globals{$1} or $1) . ";\t" . + (defined($globals{$1})?".scl 2;":".scl 3;") . + "\t.type 32;\t.endef" + if ($win64 && $line =~ /([^,]+),\@function/); + } elsif (!$elf && $dir =~ /\.size/) { + $self->{value} = ""; + if (defined($current_function)) { + $self->{value} .= "${decor}SEH_end_$current_function->{name}:" + if ($win64 && $current_function->{abi} eq "svr4"); + undef $current_function; + } + } elsif (!$elf && $dir =~ /\.align/) { + $self->{value} = ".p2align\t" . (log($line)/log(2)); + } elsif ($dir eq ".section") { + $current_segment=$line; + if (!$elf && $current_segment eq ".init") { + if ($flavour eq "macosx") { $self->{value} = ".mod_init_func"; } + elsif ($flavour eq "mingw64") { $self->{value} = ".section\t.ctors"; } + } + } elsif ($dir =~ /\.(text|data)/) { + $current_segment=".$1"; + } + $line = ""; + return $self; + } + + # non-gas case or nasm/masm + SWITCH: for ($dir) { + /\.text/ && do { my $v=undef; + if ($nasm) { + $v="section .text code align=64\n"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = ".text\$"; + $v.="$current_segment\tSEGMENT "; + $v.=$masm>=$masmref ? "ALIGN(64)" : "PAGE"; + $v.=" 'CODE'"; + } + $self->{value} = $v; + last; + }; + /\.data/ && do { my $v=undef; + if ($nasm) { + $v="section .data data align=8\n"; + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $current_segment = "_DATA"; + $v.="$current_segment\tSEGMENT"; + } + $self->{value} = $v; + last; + }; + /\.section/ && do { my $v=undef; + $line =~ s/([^,]*).*/$1/; + $line = ".CRT\$XCU" if ($line eq ".init"); + if ($nasm) { + $v="section $line"; + if ($line=~/\.([px])data/) { + $v.=" rdata align="; + $v.=$1 eq "p"? 4 : 8; + } elsif ($line=~/\.CRT\$/i) { + $v.=" rdata align=8"; + } + } else { + $v="$current_segment\tENDS\n" if ($current_segment); + $v.="$line\tSEGMENT"; + if ($line=~/\.([px])data/) { + $v.=" READONLY"; + $v.=" ALIGN(".($1 eq "p" ? 4 : 8).")" if ($masm>=$masmref); + } elsif ($line=~/\.CRT\$/i) { + $v.=" READONLY DWORD"; + } + } + $current_segment = $line; + $self->{value} = $v; + last; + }; + /\.extern/ && do { $self->{value} = "EXTERN\t".$line; + $self->{value} .= ":NEAR" if ($masm); + last; + }; + /\.globl|.global/ + && do { $self->{value} = $masm?"PUBLIC":"global"; + $self->{value} .= "\t".$line; + last; + }; + /\.size/ && do { if (defined($current_function)) { + undef $self->{value}; + if ($current_function->{abi} eq "svr4") { + $self->{value}="${decor}SEH_end_$current_function->{name}:"; + $self->{value}.=":\n" if($masm); + } + $self->{value}.="$current_function->{name}\tENDP" if($masm); + undef $current_function; + } + last; + }; + /\.align/ && do { $self->{value} = "ALIGN\t".$line; last; }; + /\.(value|long|rva|quad)/ + && do { my $sz = substr($1,0,1); + my @arr = split(/,\s*/,$line); + my $last = pop(@arr); + my $conv = sub { my $var=shift; + $var=~s/^(0b[0-1]+)/oct($1)/eig; + $var=~s/^0x([0-9a-f]+)/0$1h/ig if ($masm); + if ($sz eq "D" && ($current_segment=~/.[px]data/ || $dir eq ".rva")) + { $var=~s/([_a-z\$\@][_a-z0-9\$\@]*)/$nasm?"$1 wrt ..imagebase":"imagerel $1"/egi; } + $var; + }; + + $sz =~ tr/bvlrq/BWDDQ/; + $self->{value} = "\tD$sz\t"; + for (@arr) { $self->{value} .= &$conv($_).","; } + $self->{value} .= &$conv($last); + last; + }; + /\.byte/ && do { my @str=split(/,\s*/,$line); + map(s/(0b[0-1]+)/oct($1)/eig,@str); + map(s/0x([0-9a-f]+)/0$1h/ig,@str) if ($masm); + while ($#str>15) { + $self->{value}.="DB\t" + .join(",",@str[0..15])."\n"; + foreach (0..15) { shift @str; } + } + $self->{value}.="DB\t" + .join(",",@str) if (@str); + last; + }; + } + $line = ""; + } + + $ret; + } + sub out { + my $self = shift; + $self->{value}; + } +} + +if ($nasm) { + print <<___; +default rel +___ +} elsif ($masm) { + print <<___; +OPTION DOTNAME +___ +} +while($line=<>) { + + chomp($line); + + $line =~ s|[#!].*$||; # get rid of asm-style comments... + $line =~ s|/\*.*\*/||; # ... and C-style comments... + $line =~ s|^\s+||; # ... and skip white spaces in beginning + + undef $label; + undef $opcode; + undef $sz; + undef @args; + + if ($label=label->re(\$line)) { print $label->out(); } + + if (directive->re(\$line)) { + printf "%s",directive->out(); + } elsif ($opcode=opcode->re(\$line)) { ARGUMENT: while (1) { + my $arg; + + if ($arg=register->re(\$line)) { opcode->size($arg->size()); } + elsif ($arg=const->re(\$line)) { } + elsif ($arg=ea->re(\$line)) { } + elsif ($arg=expr->re(\$line)) { } + else { last ARGUMENT; } + + push @args,$arg; + + last ARGUMENT if ($line !~ /^,/); + + $line =~ s/^,\s*//; + } # ARGUMENT: + + $sz=opcode->size(); + + if ($#args>=0) { + my $insn; + if ($gas) { + $insn = $opcode->out($#args>=1?$args[$#args]->size():$sz); + } else { + $insn = $opcode->out(); + $insn .= $sz if (map($_->out() =~ /x?mm/,@args)); + @args = reverse(@args); + undef $sz if ($nasm && $opcode->mnemonic() eq "lea"); + } + printf "\t%s\t%s",$insn,join(",",map($_->out($sz),@args)); + } else { + printf "\t%s",$opcode->out(); + } + } + + print $line,"\n"; +} + +print "\n$current_segment\tENDS\n" if ($current_segment && $masm); +print "END\n" if ($masm); + +close STDOUT; + +################################################# +# Cross-reference x86_64 ABI "card" +# +# Unix Win64 +# %rax * * +# %rbx - - +# %rcx #4 #1 +# %rdx #3 #2 +# %rsi #2 - +# %rdi #1 - +# %rbp - - +# %rsp - - +# %r8 #5 #3 +# %r9 #6 #4 +# %r10 * * +# %r11 * * +# %r12 - - +# %r13 - - +# %r14 - - +# %r15 - - +# +# (*) volatile register +# (-) preserved by callee +# (#) Nth argument, volatile +# +# In Unix terms top of stack is argument transfer area for arguments +# which could not be accomodated in registers. Or in other words 7th +# [integer] argument resides at 8(%rsp) upon function entry point. +# 128 bytes above %rsp constitute a "red zone" which is not touched +# by signal handlers and can be used as temporal storage without +# allocating a frame. +# +# In Win64 terms N*8 bytes on top of stack is argument transfer area, +# which belongs to/can be overwritten by callee. N is the number of +# arguments passed to callee, *but* not less than 4! This means that +# upon function entry point 5th argument resides at 40(%rsp), as well +# as that 32 bytes from 8(%rsp) can always be used as temporal +# storage [without allocating a frame]. One can actually argue that +# one can assume a "red zone" above stack pointer under Win64 as well. +# Point is that at apparently no occasion Windows kernel would alter +# the area above user stack pointer in true asynchronous manner... +# +# All the above means that if assembler programmer adheres to Unix +# register and stack layout, but disregards the "red zone" existense, +# it's possible to use following prologue and epilogue to "gear" from +# Unix to Win64 ABI in leaf functions with not more than 6 arguments. +# +# omnipotent_function: +# ifdef WIN64 +# movq %rdi,8(%rsp) +# movq %rsi,16(%rsp) +# movq %rcx,%rdi ; if 1st argument is actually present +# movq %rdx,%rsi ; if 2nd argument is actually ... +# movq %r8,%rdx ; if 3rd argument is ... +# movq %r9,%rcx ; if 4th argument ... +# movq 40(%rsp),%r8 ; if 5th ... +# movq 48(%rsp),%r9 ; if 6th ... +# endif +# ... +# ifdef WIN64 +# movq 8(%rsp),%rdi +# movq 16(%rsp),%rsi +# endif +# ret +# +################################################# +# Win64 SEH, Structured Exception Handling. +# +# Unlike on Unix systems(*) lack of Win64 stack unwinding information +# has undesired side-effect at run-time: if an exception is raised in +# assembler subroutine such as those in question (basically we're +# referring to segmentation violations caused by malformed input +# parameters), the application is briskly terminated without invoking +# any exception handlers, most notably without generating memory dump +# or any user notification whatsoever. This poses a problem. It's +# possible to address it by registering custom language-specific +# handler that would restore processor context to the state at +# subroutine entry point and return "exception is not handled, keep +# unwinding" code. Writing such handler can be a challenge... But it's +# doable, though requires certain coding convention. Consider following +# snippet: +# +# .type function,@function +# function: +# movq %rsp,%rax # copy rsp to volatile register +# pushq %r15 # save non-volatile registers +# pushq %rbx +# pushq %rbp +# movq %rsp,%r11 +# subq %rdi,%r11 # prepare [variable] stack frame +# andq $-64,%r11 +# movq %rax,0(%r11) # check for exceptions +# movq %r11,%rsp # allocate [variable] stack frame +# movq %rax,0(%rsp) # save original rsp value +# magic_point: +# ... +# movq 0(%rsp),%rcx # pull original rsp value +# movq -24(%rcx),%rbp # restore non-volatile registers +# movq -16(%rcx),%rbx +# movq -8(%rcx),%r15 +# movq %rcx,%rsp # restore original rsp +# ret +# .size function,.-function +# +# The key is that up to magic_point copy of original rsp value remains +# in chosen volatile register and no non-volatile register, except for +# rsp, is modified. While past magic_point rsp remains constant till +# the very end of the function. In this case custom language-specific +# exception handler would look like this: +# +# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame, +# CONTEXT *context,DISPATCHER_CONTEXT *disp) +# { ULONG64 *rsp = (ULONG64 *)context->Rax; +# if (context->Rip >= magic_point) +# { rsp = ((ULONG64 **)context->Rsp)[0]; +# context->Rbp = rsp[-3]; +# context->Rbx = rsp[-2]; +# context->R15 = rsp[-1]; +# } +# context->Rsp = (ULONG64)rsp; +# context->Rdi = rsp[1]; +# context->Rsi = rsp[2]; +# +# memcpy (disp->ContextRecord,context,sizeof(CONTEXT)); +# RtlVirtualUnwind(UNW_FLAG_NHANDLER,disp->ImageBase, +# dips->ControlPc,disp->FunctionEntry,disp->ContextRecord, +# &disp->HandlerData,&disp->EstablisherFrame,NULL); +# return ExceptionContinueSearch; +# } +# +# It's appropriate to implement this handler in assembler, directly in +# function's module. In order to do that one has to know members' +# offsets in CONTEXT and DISPATCHER_CONTEXT structures and some constant +# values. Here they are: +# +# CONTEXT.Rax 120 +# CONTEXT.Rcx 128 +# CONTEXT.Rdx 136 +# CONTEXT.Rbx 144 +# CONTEXT.Rsp 152 +# CONTEXT.Rbp 160 +# CONTEXT.Rsi 168 +# CONTEXT.Rdi 176 +# CONTEXT.R8 184 +# CONTEXT.R9 192 +# CONTEXT.R10 200 +# CONTEXT.R11 208 +# CONTEXT.R12 216 +# CONTEXT.R13 224 +# CONTEXT.R14 232 +# CONTEXT.R15 240 +# CONTEXT.Rip 248 +# CONTEXT.Xmm6 512 +# sizeof(CONTEXT) 1232 +# DISPATCHER_CONTEXT.ControlPc 0 +# DISPATCHER_CONTEXT.ImageBase 8 +# DISPATCHER_CONTEXT.FunctionEntry 16 +# DISPATCHER_CONTEXT.EstablisherFrame 24 +# DISPATCHER_CONTEXT.TargetIp 32 +# DISPATCHER_CONTEXT.ContextRecord 40 +# DISPATCHER_CONTEXT.LanguageHandler 48 +# DISPATCHER_CONTEXT.HandlerData 56 +# UNW_FLAG_NHANDLER 0 +# ExceptionContinueSearch 1 +# +# In order to tie the handler to the function one has to compose +# couple of structures: one for .xdata segment and one for .pdata. +# +# UNWIND_INFO structure for .xdata segment would be +# +# function_unwind_info: +# .byte 9,0,0,0 +# .rva handler +# +# This structure designates exception handler for a function with +# zero-length prologue, no stack frame or frame register. +# +# To facilitate composing of .pdata structures, auto-generated "gear" +# prologue copies rsp value to rax and denotes next instruction with +# .LSEH_begin_{function_name} label. This essentially defines the SEH +# styling rule mentioned in the beginning. Position of this label is +# chosen in such manner that possible exceptions raised in the "gear" +# prologue would be accounted to caller and unwound from latter's frame. +# End of function is marked with respective .LSEH_end_{function_name} +# label. To summarize, .pdata segment would contain +# +# .rva .LSEH_begin_function +# .rva .LSEH_end_function +# .rva function_unwind_info +# +# Reference to functon_unwind_info from .xdata segment is the anchor. +# In case you wonder why references are 32-bit .rvas and not 64-bit +# .quads. References put into these two segments are required to be +# *relative* to the base address of the current binary module, a.k.a. +# image base. No Win64 module, be it .exe or .dll, can be larger than +# 2GB and thus such relative references can be and are accommodated in +# 32 bits. +# +# Having reviewed the example function code, one can argue that "movq +# %rsp,%rax" above is redundant. It is not! Keep in mind that on Unix +# rax would contain an undefined value. If this "offends" you, use +# another register and refrain from modifying rax till magic_point is +# reached, i.e. as if it was a non-volatile register. If more registers +# are required prior [variable] frame setup is completed, note that +# nobody says that you can have only one "magic point." You can +# "liberate" non-volatile registers by denoting last stack off-load +# instruction and reflecting it in finer grade unwind logic in handler. +# After all, isn't it why it's called *language-specific* handler... +# +# Attentive reader can notice that exceptions would be mishandled in +# auto-generated "gear" epilogue. Well, exception effectively can't +# occur there, because if memory area used by it was subject to +# segmentation violation, then it would be raised upon call to the +# function (and as already mentioned be accounted to caller, which is +# not a problem). If you're still not comfortable, then define tail +# "magic point" just prior ret instruction and have handler treat it... +# +# (*) Note that we're talking about run-time, not debug-time. Lack of +# unwind information makes debugging hard on both Windows and +# Unix. "Unlike" referes to the fact that on Unix signal handler +# will always be invoked, core dumped and appropriate exit code +# returned to parent (for user notification). diff --git a/openssl/crypto/perlasm/x86asm.pl b/openssl/crypto/perlasm/x86asm.pl new file mode 100644 index 00000000..28080caa --- /dev/null +++ b/openssl/crypto/perlasm/x86asm.pl @@ -0,0 +1,207 @@ +#!/usr/bin/env perl + +# require 'x86asm.pl'; +# &asm_init(<flavor>,"des-586.pl"[,$i386only]); +# &function_begin("foo"); +# ... +# &function_end("foo"); +# &asm_finish + +$out=(); +$i386=0; + +# AUTOLOAD is this context has quite unpleasant side effect, namely +# that typos in function calls effectively go to assembler output, +# but on the pros side we don't have to implement one subroutine per +# each opcode... +sub ::AUTOLOAD +{ my $opcode = $AUTOLOAD; + + die "more than 4 arguments passed to $opcode" if ($#_>3); + + $opcode =~ s/.*:://; + if ($opcode =~ /^push/) { $stack+=4; } + elsif ($opcode =~ /^pop/) { $stack-=4; } + + &generic($opcode,@_) or die "undefined subroutine \&$AUTOLOAD"; +} + +sub ::emit +{ my $opcode=shift; + + if ($#_==-1) { push(@out,"\t$opcode\n"); } + else { push(@out,"\t$opcode\t".join(',',@_)."\n"); } +} + +sub ::LB +{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'low byte'"; + $1."l"; +} +sub ::HB +{ $_[0] =~ m/^e?([a-d])x$/o or die "$_[0] does not have a 'high byte'"; + $1."h"; +} +sub ::stack_push{ my $num=$_[0]*4; $stack+=$num; &sub("esp",$num); } +sub ::stack_pop { my $num=$_[0]*4; $stack-=$num; &add("esp",$num); } +sub ::blindpop { &pop($_[0]); $stack+=4; } +sub ::wparam { &DWP($stack+4*$_[0],"esp"); } +sub ::swtmp { &DWP(4*$_[0],"esp"); } + +sub ::bswap +{ if ($i386) # emulate bswap for i386 + { &comment("bswap @_"); + &xchg(&HB(@_),&LB(@_)); + &ror (@_,16); + &xchg(&HB(@_),&LB(@_)); + } + else + { &generic("bswap",@_); } +} +# These are made-up opcodes introduced over the years essentially +# by ignorance, just alias them to real ones... +sub ::movb { &mov(@_); } +sub ::xorb { &xor(@_); } +sub ::rotl { &rol(@_); } +sub ::rotr { &ror(@_); } +sub ::exch { &xchg(@_); } +sub ::halt { &hlt; } +sub ::movz { &movzx(@_); } +sub ::pushf { &pushfd; } +sub ::popf { &popfd; } + +# 3 argument instructions +sub ::movq +{ my($p1,$p2,$optimize)=@_; + + if ($optimize && $p1=~/^mm[0-7]$/ && $p2=~/^mm[0-7]$/) + # movq between mmx registers can sink Intel CPUs + { &::pshufw($p1,$p2,0xe4); } + else + { &::generic("movq",@_); } +} + +# label management +$lbdecor="L"; # local label decoration, set by package +$label="000"; + +sub ::islabel # see is argument is a known label +{ my $i; + foreach $i (values %label) { return $i if ($i eq $_[0]); } + $label{$_[0]}; # can be undef +} + +sub ::label # instantiate a function-scope label +{ if (!defined($label{$_[0]})) + { $label{$_[0]}="${lbdecor}${label}${_[0]}"; $label++; } + $label{$_[0]}; +} + +sub ::LABEL # instantiate a file-scope label +{ $label{$_[0]}=$_[1] if (!defined($label{$_[0]})); + $label{$_[0]}; +} + +sub ::static_label { &::LABEL($_[0],$lbdecor.$_[0]); } + +sub ::set_label_B { push(@out,"@_:\n"); } +sub ::set_label +{ my $label=&::label($_[0]); + &::align($_[1]) if ($_[1]>1); + &::set_label_B($label); + $label; +} + +sub ::wipe_labels # wipes function-scope labels +{ foreach $i (keys %label) + { delete $label{$i} if ($label{$i} =~ /^\Q${lbdecor}\E[0-9]{3}/); } +} + +# subroutine management +sub ::function_begin +{ &function_begin_B(@_); + $stack=4; + &push("ebp"); + &push("ebx"); + &push("esi"); + &push("edi"); +} + +sub ::function_end +{ &pop("edi"); + &pop("esi"); + &pop("ebx"); + &pop("ebp"); + &ret(); + &function_end_B(@_); + $stack=0; + &wipe_labels(); +} + +sub ::function_end_A +{ &pop("edi"); + &pop("esi"); + &pop("ebx"); + &pop("ebp"); + &ret(); + $stack+=16; # readjust esp as if we didn't pop anything +} + +sub ::asciz +{ my @str=unpack("C*",shift); + push @str,0; + while ($#str>15) { + &data_byte(@str[0..15]); + foreach (0..15) { shift @str; } + } + &data_byte(@str) if (@str); +} + +sub ::asm_finish +{ &file_end(); + print @out; +} + +sub ::asm_init +{ my ($type,$fn,$cpu)=@_; + + $filename=$fn; + $i386=$cpu; + + $elf=$cpp=$coff=$aout=$macosx=$win32=$netware=$mwerks=0; + if (($type eq "elf")) + { $elf=1; require "x86gas.pl"; } + elsif (($type eq "a\.out")) + { $aout=1; require "x86gas.pl"; } + elsif (($type eq "coff" or $type eq "gaswin")) + { $coff=1; require "x86gas.pl"; } + elsif (($type eq "win32n")) + { $win32=1; require "x86nasm.pl"; } + elsif (($type eq "nw-nasm")) + { $netware=1; require "x86nasm.pl"; } + #elsif (($type eq "nw-mwasm")) + #{ $netware=1; $mwerks=1; require "x86nasm.pl"; } + elsif (($type eq "win32")) + { $win32=1; require "x86masm.pl"; } + elsif (($type eq "macosx")) + { $aout=1; $macosx=1; require "x86gas.pl"; } + else + { print STDERR <<"EOF"; +Pick one target type from + elf - Linux, FreeBSD, Solaris x86, etc. + a.out - DJGPP, elder OpenBSD, etc. + coff - GAS/COFF such as Win32 targets + win32n - Windows 95/Windows NT NASM format + nw-nasm - NetWare NASM format + macosx - Mac OS X +EOF + exit(1); + } + + $pic=0; + for (@ARGV) { $pic=1 if (/\-[fK]PIC/i); } + + $filename =~ s/\.pl$//; + &file($filename); +} + +1; diff --git a/openssl/crypto/perlasm/x86gas.pl b/openssl/crypto/perlasm/x86gas.pl new file mode 100644 index 00000000..6eab727f --- /dev/null +++ b/openssl/crypto/perlasm/x86gas.pl @@ -0,0 +1,247 @@ +#!/usr/bin/env perl + +package x86gas; + +*out=\@::out; + +$::lbdecor=$::aout?"L":".L"; # local label decoration +$nmdecor=($::aout or $::coff)?"_":""; # external name decoration + +$initseg=""; + +$align=16; +$align=log($align)/log(2) if ($::aout); +$com_start="#" if ($::aout or $::coff); + +sub opsize() +{ my $reg=shift; + if ($reg =~ m/^%e/o) { "l"; } + elsif ($reg =~ m/^%[a-d][hl]$/o) { "b"; } + elsif ($reg =~ m/^%[xm]/o) { undef; } + else { "w"; } +} + +# swap arguments; +# expand opcode with size suffix; +# prefix numeric constants with $; +sub ::generic +{ my($opcode,@arg)=@_; + my($suffix,$dst,$src); + + @arg=reverse(@arg); + + for (@arg) + { s/^(\*?)(e?[a-dsixphl]{2})$/$1%$2/o; # gp registers + s/^([xy]?mm[0-7])$/%$1/o; # xmm/mmx registers + s/^(\-?[0-9]+)$/\$$1/o; # constants + s/^(\-?0x[0-9a-f]+)$/\$$1/o; # constants + } + + $dst = $arg[$#arg] if ($#arg>=0); + $src = $arg[$#arg-1] if ($#arg>=1); + if ($dst =~ m/^%/o) { $suffix=&opsize($dst); } + elsif ($src =~ m/^%/o) { $suffix=&opsize($src); } + else { $suffix="l"; } + undef $suffix if ($dst =~ m/^%[xm]/o || $src =~ m/^%[xm]/o); + + if ($#_==0) { &::emit($opcode); } + elsif ($opcode =~ m/^j/o && $#_==1) { &::emit($opcode,@arg); } + elsif ($opcode eq "call" && $#_==1) { &::emit($opcode,@arg); } + elsif ($opcode =~ m/^set/&& $#_==1) { &::emit($opcode,@arg); } + else { &::emit($opcode.$suffix,@arg);} + + 1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::movzx { &::movzb(@_); } +sub ::pushfd { &::pushfl; } +sub ::popfd { &::popfl; } +sub ::cpuid { &::emit(".byte\t0x0f,0xa2"); } +sub ::rdtsc { &::emit(".byte\t0x0f,0x31"); } + +sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr { &::generic("call","*$_[0]"); } +sub ::jmp_ptr { &::generic("jmp","*$_[0]"); } + +*::bswap = sub { &::emit("bswap","%$_[0]"); } if (!$::i386); + +sub ::DWP +{ my($addr,$reg1,$reg2,$idx)=@_; + my $ret=""; + + $addr =~ s/^\s+//; + # prepend global references with optional underscore + $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; + + $reg1 = "%$reg1" if ($reg1); + $reg2 = "%$reg2" if ($reg2); + + $ret .= $addr if (($addr ne "") && ($addr ne 0)); + + if ($reg2) + { $idx!= 0 or $idx=1; + $ret .= "($reg1,$reg2,$idx)"; + } + elsif ($reg1) + { $ret .= "($reg1)"; } + + $ret; +} +sub ::QWP { &::DWP(@_); } +sub ::BP { &::DWP(@_); } +sub ::BC { @_; } +sub ::DWC { @_; } + +sub ::file +{ push(@out,".file\t\"$_[0].s\"\n.text\n"); } + +sub ::function_begin_B +{ my $func=shift; + my $global=($func !~ /^_/); + my $begin="${::lbdecor}_${func}_begin"; + + &::LABEL($func,$global?"$begin":"$nmdecor$func"); + $func=$nmdecor.$func; + + push(@out,".globl\t$func\n") if ($global); + if ($::coff) + { push(@out,".def\t$func;\t.scl\t".(3-$global).";\t.type\t32;\t.endef\n"); } + elsif (($::aout and !$::pic) or $::macosx) + { } + else + { push(@out,".type $func,\@function\n"); } + push(@out,".align\t$align\n"); + push(@out,"$func:\n"); + push(@out,"$begin:\n") if ($global); + $::stack=4; +} + +sub ::function_end_B +{ my $func=shift; + push(@out,".size\t$nmdecor$func,.-".&::LABEL($func)."\n") if ($::elf); + $::stack=0; + &::wipe_labels(); +} + +sub ::comment + { + if (!defined($com_start) or $::elf) + { # Regarding $::elf above... + # GNU and SVR4 as'es use different comment delimiters, + push(@out,"\n"); # so we just skip ELF comments... + return; + } + foreach (@_) + { + if (/^\s*$/) + { push(@out,"\n"); } + else + { push(@out,"\t$com_start $_ $com_end\n"); } + } + } + +sub ::external_label +{ foreach(@_) { &::LABEL($_,$nmdecor.$_); } } + +sub ::public_label +{ push(@out,".globl\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } + +sub ::file_end +{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,4"; + if ($::elf) { push (@out,"$tmp,4\n"); } + else { push (@out,"$tmp\n"); } + } + if ($::macosx) + { if (%non_lazy_ptr) + { push(@out,".section __IMPORT,__pointers,non_lazy_symbol_pointers\n"); + foreach $i (keys %non_lazy_ptr) + { push(@out,"$non_lazy_ptr{$i}:\n.indirect_symbol\t$i\n.long\t0\n"); } + } + } + push(@out,$initseg) if ($initseg); +} + +sub ::data_byte { push(@out,".byte\t".join(',',@_)."\n"); } +sub ::data_word { push(@out,".long\t".join(',',@_)."\n"); } + +sub ::align +{ my $val=$_[0],$p2,$i; + if ($::aout) + { for ($p2=0;$val!=0;$val>>=1) { $p2++; } + $val=$p2-1; + $val.=",0x90"; + } + push(@out,".align\t$val\n"); +} + +sub ::picmeup +{ my($dst,$sym,$base,$reflabel)=@_; + + if ($::pic && ($::elf || $::aout)) + { if (!defined($base)) + { &::call(&::label("PIC_me_up")); + &::set_label("PIC_me_up"); + &::blindpop($dst); + $base=$dst; + $reflabel=&::label("PIC_me_up"); + } + if ($::macosx) + { my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr"); + &::mov($dst,&::DWP("$indirect-$reflabel",$base)); + $non_lazy_ptr{"$nmdecor$sym"}=$indirect; + } + else + { &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]", + $base)); + &::mov($dst,&::DWP("$sym\@GOT",$dst)); + } + } + else + { &::lea($dst,&::DWP($sym)); } +} + +sub ::initseg +{ my $f=$nmdecor.shift; + + if ($::elf) + { $initseg.=<<___; +.section .init + call $f + jmp .Linitalign +.align $align +.Linitalign: +___ + } + elsif ($::coff) + { $initseg.=<<___; # applies to both Cygwin and Mingw +.section .ctors +.long $f +___ + } + elsif ($::macosx) + { $initseg.=<<___; +.mod_init_func +.align 2 +.long $f +___ + } + elsif ($::aout) + { my $ctor="${nmdecor}_GLOBAL_\$I\$$f"; + $initseg.=".text\n"; + $initseg.=".type $ctor,\@function\n" if ($::pic); + $initseg.=<<___; # OpenBSD way... +.globl $ctor +.align 2 +$ctor: + jmp $f +___ + } +} + +sub ::dataseg +{ push(@out,".data\n"); } + +1; diff --git a/openssl/crypto/perlasm/x86masm.pl b/openssl/crypto/perlasm/x86masm.pl new file mode 100644 index 00000000..3d50e4a7 --- /dev/null +++ b/openssl/crypto/perlasm/x86masm.pl @@ -0,0 +1,184 @@ +#!/usr/bin/env perl + +package x86masm; + +*out=\@::out; + +$::lbdecor="\$L"; # local label decoration +$nmdecor="_"; # external name decoration + +$initseg=""; +$segment=""; + +sub ::generic +{ my ($opcode,@arg)=@_; + + # fix hexadecimal constants + for (@arg) { s/0x([0-9a-f]+)/0$1h/oi; } + + if ($opcode !~ /movq/) + { # fix xmm references + $arg[0] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[1]=~/\bxmm[0-7]\b/i); + $arg[1] =~ s/\b[A-Z]+WORD\s+PTR/XMMWORD PTR/i if ($arg[0]=~/\bxmm[0-7]\b/i); + } + + &::emit($opcode,@arg); + 1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr { &::emit("call",@_); } +sub ::jmp_ptr { &::emit("jmp",@_); } + +sub get_mem +{ my($size,$addr,$reg1,$reg2,$idx)=@_; + my($post,$ret); + + $ret .= "$size PTR " if ($size ne ""); + + $addr =~ s/^\s+//; + # prepend global references with optional underscore + $addr =~ s/^([^\+\-0-9][^\+\-]*)/&::islabel($1) or "$nmdecor$1"/ige; + # put address arithmetic expression in parenthesis + $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/); + + if (($addr ne "") && ($addr ne 0)) + { if ($addr !~ /^-/) { $ret .= "$addr"; } + else { $post=$addr; } + } + $ret .= "["; + + if ($reg2 ne "") + { $idx!=0 or $idx=1; + $ret .= "$reg2*$idx"; + $ret .= "+$reg1" if ($reg1 ne ""); + } + else + { $ret .= "$reg1"; } + + $ret .= "$post]"; + $ret =~ s/\+\]/]/; # in case $addr was the only argument + $ret =~ s/\[\s*\]//; + + $ret; +} +sub ::BP { &get_mem("BYTE",@_); } +sub ::DWP { &get_mem("DWORD",@_); } +sub ::QWP { &get_mem("QWORD",@_); } +sub ::BC { "@_"; } +sub ::DWC { "@_"; } + +sub ::file +{ my $tmp=<<___; +TITLE $_[0].asm +IF \@Version LT 800 +ECHO MASM version 8.00 or later is strongly recommended. +ENDIF +.486 +.MODEL FLAT +OPTION DOTNAME +IF \@Version LT 800 +.text\$ SEGMENT PAGE 'CODE' +ELSE +.text\$ SEGMENT ALIGN(64) 'CODE' +ENDIF +___ + push(@out,$tmp); + $segment = ".text\$"; +} + +sub ::function_begin_B +{ my $func=shift; + my $global=($func !~ /^_/); + my $begin="${::lbdecor}_${func}_begin"; + + &::LABEL($func,$global?"$begin":"$nmdecor$func"); + $func="ALIGN\t16\n".$nmdecor.$func."\tPROC"; + + if ($global) { $func.=" PUBLIC\n${begin}::\n"; } + else { $func.=" PRIVATE\n"; } + push(@out,$func); + $::stack=4; +} +sub ::function_end_B +{ my $func=shift; + + push(@out,"$nmdecor$func ENDP\n"); + $::stack=0; + &::wipe_labels(); +} + +sub ::file_end +{ my $xmmheader=<<___; +.686 +.XMM +IF \@Version LT 800 +XMMWORD STRUCT 16 +DQ 2 dup (?) +XMMWORD ENDS +ENDIF +___ + if (grep {/\b[x]?mm[0-7]\b/i} @out) { + grep {s/\.[3-7]86/$xmmheader/} @out; + } + + push(@out,"$segment ENDS\n"); + + if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) + { my $comm=<<___; +.bss SEGMENT 'BSS' +COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD +.bss ENDS +___ + # comment out OPENSSL_ia32cap_P declarations + grep {s/(^EXTERN\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; + push (@out,$comm); + } + push (@out,$initseg) if ($initseg); + push (@out,"END\n"); +} + +sub ::comment { foreach (@_) { push(@out,"\t; $_\n"); } } + +*::set_label_B = sub +{ my $l=shift; push(@out,$l.($l=~/^\Q${::lbdecor}\E[0-9]{3}/?":\n":"::\n")); }; + +sub ::external_label +{ foreach(@_) + { push(@out, "EXTERN\t".&::LABEL($_,$nmdecor.$_).":NEAR\n"); } +} + +sub ::public_label +{ push(@out,"PUBLIC\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } + +sub ::data_byte +{ push(@out,("DB\t").join(',',@_)."\n"); } + +sub ::data_word +{ push(@out,("DD\t").join(',',@_)."\n"); } + +sub ::align +{ push(@out,"ALIGN\t$_[0]\n"); } + +sub ::picmeup +{ my($dst,$sym)=@_; + &::lea($dst,&::DWP($sym)); +} + +sub ::initseg +{ my $f=$nmdecor.shift; + + $initseg.=<<___; +.CRT\$XCU SEGMENT DWORD PUBLIC 'DATA' +EXTERN $f:NEAR +DD $f +.CRT\$XCU ENDS +___ +} + +sub ::dataseg +{ push(@out,"$segment\tENDS\n_DATA\tSEGMENT\n"); $segment="_DATA"; } + +1; diff --git a/openssl/crypto/perlasm/x86nasm.pl b/openssl/crypto/perlasm/x86nasm.pl new file mode 100644 index 00000000..ce2bed9b --- /dev/null +++ b/openssl/crypto/perlasm/x86nasm.pl @@ -0,0 +1,166 @@ +#!/usr/bin/env perl + +package x86nasm; + +*out=\@::out; + +$::lbdecor="L\$"; # local label decoration +$nmdecor=$::netware?"":"_"; # external name decoration +$drdecor=$::mwerks?".":""; # directive decoration + +$initseg=""; + +sub ::generic +{ my $opcode=shift; + my $tmp; + + if (!$::mwerks) + { if ($opcode =~ m/^j/o && $#_==0) # optimize jumps + { $_[0] = "NEAR $_[0]"; } + elsif ($opcode eq "lea" && $#_==1) # wipe storage qualifier from lea + { $_[1] =~ s/^[^\[]*\[/\[/o; } + } + &::emit($opcode,@_); + 1; +} +# +# opcodes not covered by ::generic above, mostly inconsistent namings... +# +sub ::call { &::emit("call",(&::islabel($_[0]) or "$nmdecor$_[0]")); } +sub ::call_ptr { &::emit("call",@_); } +sub ::jmp_ptr { &::emit("jmp",@_); } + +sub get_mem +{ my($size,$addr,$reg1,$reg2,$idx)=@_; + my($post,$ret); + + if ($size ne "") + { $ret .= "$size"; + $ret .= " PTR" if ($::mwerks); + $ret .= " "; + } + $ret .= "["; + + $addr =~ s/^\s+//; + # prepend global references with optional underscore + $addr =~ s/^([^\+\-0-9][^\+\-]*)/::islabel($1) or "$nmdecor$1"/ige; + # put address arithmetic expression in parenthesis + $addr="($addr)" if ($addr =~ /^.+[\-\+].+$/); + + if (($addr ne "") && ($addr ne 0)) + { if ($addr !~ /^-/) { $ret .= "$addr+"; } + else { $post=$addr; } + } + + if ($reg2 ne "") + { $idx!=0 or $idx=1; + $ret .= "$reg2*$idx"; + $ret .= "+$reg1" if ($reg1 ne ""); + } + else + { $ret .= "$reg1"; } + + $ret .= "$post]"; + $ret =~ s/\+\]/]/; # in case $addr was the only argument + + $ret; +} +sub ::BP { &get_mem("BYTE",@_); } +sub ::DWP { &get_mem("DWORD",@_); } +sub ::QWP { &get_mem("",@_); } +sub ::BC { (($::mwerks)?"":"BYTE ")."@_"; } +sub ::DWC { (($::mwerks)?"":"DWORD ")."@_"; } + +sub ::file +{ if ($::mwerks) { push(@out,".section\t.text,64\n"); } + else + { my $tmp=<<___; +%ifidn __OUTPUT_FORMAT__,obj +section code use32 class=code align=64 +%elifidn __OUTPUT_FORMAT__,win32 +\$\@feat.00 equ 1 +section .text code align=64 +%else +section .text code +%endif +___ + push(@out,$tmp); + } +} + +sub ::function_begin_B +{ my $func=shift; + my $global=($func !~ /^_/); + my $begin="${::lbdecor}_${func}_begin"; + + $begin =~ s/^\@/./ if ($::mwerks); # the torture never stops + + &::LABEL($func,$global?"$begin":"$nmdecor$func"); + $func=$nmdecor.$func; + + push(@out,"${drdecor}global $func\n") if ($global); + push(@out,"${drdecor}align 16\n"); + push(@out,"$func:\n"); + push(@out,"$begin:\n") if ($global); + $::stack=4; +} + +sub ::function_end_B +{ $::stack=0; + &::wipe_labels(); +} + +sub ::file_end +{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) + { my $comm=<<___; +${drdecor}segment .bss +${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 4 +___ + # comment out OPENSSL_ia32cap_P declarations + grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; + push (@out,$comm) + } + push (@out,$initseg) if ($initseg); +} + +sub ::comment { foreach (@_) { push(@out,"\t; $_\n"); } } + +sub ::external_label +{ foreach(@_) + { push(@out,"${drdecor}extern\t".&::LABEL($_,$nmdecor.$_)."\n"); } +} + +sub ::public_label +{ push(@out,"${drdecor}global\t".&::LABEL($_[0],$nmdecor.$_[0])."\n"); } + +sub ::data_byte +{ push(@out,(($::mwerks)?".byte\t":"db\t").join(',',@_)."\n"); } + +sub ::data_word +{ push(@out,(($::mwerks)?".long\t":"dd\t").join(',',@_)."\n"); } + +sub ::align +{ push(@out,"${drdecor}align\t$_[0]\n"); } + +sub ::picmeup +{ my($dst,$sym)=@_; + &::lea($dst,&::DWP($sym)); +} + +sub ::initseg +{ my $f=$nmdecor.shift; + if ($::win32) + { $initseg=<<___; +segment .CRT\$XCU data align=4 +extern $f +dd $f +___ + } +} + +sub ::dataseg +{ if ($mwerks) { push(@out,".section\t.data,4\n"); } + else { push(@out,"section\t.data align=4\n"); } +} + +1; |