diff options
Diffstat (limited to 'main/openssl/crypto/rc4/asm/rc4-s390x.pl')
-rw-r--r-- | main/openssl/crypto/rc4/asm/rc4-s390x.pl | 205 |
1 files changed, 205 insertions, 0 deletions
diff --git a/main/openssl/crypto/rc4/asm/rc4-s390x.pl b/main/openssl/crypto/rc4/asm/rc4-s390x.pl new file mode 100644 index 00000000..96681fa0 --- /dev/null +++ b/main/openssl/crypto/rc4/asm/rc4-s390x.pl @@ -0,0 +1,205 @@ +#!/usr/bin/env perl +# +# ==================================================================== +# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL +# project. The module is, however, dual licensed under OpenSSL and +# CRYPTOGAMS licenses depending on where you obtain it. For further +# details see http://www.openssl.org/~appro/cryptogams/. +# ==================================================================== +# +# February 2009 +# +# Performance is 2x of gcc 3.4.6 on z10. Coding "secret" is to +# "cluster" Address Generation Interlocks, so that one pipeline stall +# resolves several dependencies. + +$rp="%r14"; +$sp="%r15"; +$code=<<___; +.text + +___ + +# void RC4(RC4_KEY *key,size_t len,const void *inp,void *out) +{ +$acc="%r0"; +$cnt="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$out="%r5"; + +@XX=("%r6","%r7"); +@TX=("%r8","%r9"); +$YY="%r10"; +$TY="%r11"; + +$code.=<<___; +.globl RC4 +.type RC4,\@function +.align 64 +RC4: + stmg %r6,%r11,48($sp) + llgc $XX[0],0($key) + llgc $YY,1($key) + la $XX[0],1($XX[0]) + nill $XX[0],0xff + srlg $cnt,$len,3 + ltgr $cnt,$cnt + llgc $TX[0],2($XX[0],$key) + jz .Lshort + j .Loop8 + +.align 64 +.Loop8: +___ +for ($i=0;$i<8;$i++) { +$code.=<<___; + la $YY,0($YY,$TX[0]) # $i + nill $YY,255 + la $XX[1],1($XX[0]) + nill $XX[1],255 +___ +$code.=<<___ if ($i==1); + llgc $acc,2($TY,$key) +___ +$code.=<<___ if ($i>1); + sllg $acc,$acc,8 + ic $acc,2($TY,$key) +___ +$code.=<<___; + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + llgc $TX[1],2($XX[1],$key) + stc $TY,2($XX[0],$key) + cr $XX[1],$YY + jne .Lcmov$i + la $TX[1],0($TX[0]) +.Lcmov$i: + la $TY,0($TY,$TX[0]) + nill $TY,255 +___ +push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers +} + +$code.=<<___; + lg $TX[1],0($inp) + sllg $acc,$acc,8 + la $inp,8($inp) + ic $acc,2($TY,$key) + xgr $acc,$TX[1] + stg $acc,0($out) + la $out,8($out) + brct $cnt,.Loop8 + +.Lshort: + lghi $acc,7 + ngr $len,$acc + jz .Lexit + j .Loop1 + +.align 16 +.Loop1: + la $YY,0($YY,$TX[0]) + nill $YY,255 + llgc $TY,2($YY,$key) + stc $TX[0],2($YY,$key) + stc $TY,2($XX[0],$key) + ar $TY,$TX[0] + ahi $XX[0],1 + nill $TY,255 + nill $XX[0],255 + llgc $acc,0($inp) + la $inp,1($inp) + llgc $TY,2($TY,$key) + llgc $TX[0],2($XX[0],$key) + xr $acc,$TY + stc $acc,0($out) + la $out,1($out) + brct $len,.Loop1 + +.Lexit: + ahi $XX[0],-1 + stc $XX[0],0($key) + stc $YY,1($key) + lmg %r6,%r11,48($sp) + br $rp +.size RC4,.-RC4 +.string "RC4 for s390x, CRYPTOGAMS by <appro\@openssl.org>" + +___ +} + +# void RC4_set_key(RC4_KEY *key,unsigned int len,const void *inp) +{ +$cnt="%r0"; +$idx="%r1"; +$key="%r2"; +$len="%r3"; +$inp="%r4"; +$acc="%r5"; +$dat="%r6"; +$ikey="%r7"; +$iinp="%r8"; + +$code.=<<___; +.globl RC4_set_key +.type RC4_set_key,\@function +.align 64 +RC4_set_key: + stmg %r6,%r8,48($sp) + lhi $cnt,256 + la $idx,0(%r0) + sth $idx,0($key) +.align 4 +.L1stloop: + stc $idx,2($idx,$key) + la $idx,1($idx) + brct $cnt,.L1stloop + + lghi $ikey,-256 + lr $cnt,$len + la $iinp,0(%r0) + la $idx,0(%r0) +.align 16 +.L2ndloop: + llgc $acc,2+256($ikey,$key) + llgc $dat,0($iinp,$inp) + la $idx,0($idx,$acc) + la $ikey,1($ikey) + la $idx,0($idx,$dat) + nill $idx,255 + la $iinp,1($iinp) + tml $ikey,255 + llgc $dat,2($idx,$key) + stc $dat,2+256-1($ikey,$key) + stc $acc,2($idx,$key) + jz .Ldone + brct $cnt,.L2ndloop + lr $cnt,$len + la $iinp,0(%r0) + j .L2ndloop +.Ldone: + lmg %r6,%r8,48($sp) + br $rp +.size RC4_set_key,.-RC4_set_key + +___ +} + +# const char *RC4_options() +$code.=<<___; +.globl RC4_options +.type RC4_options,\@function +.align 16 +RC4_options: + larl %r2,.Loptions + br %r14 +.size RC4_options,.-RC4_options +.section .rodata +.Loptions: +.align 8 +.string "rc4(8x,char)" +___ + +print $code; |