summaryrefslogtreecommitdiff
path: root/main/openssl/crypto/sha/asm/sha1-s390x.pl
diff options
context:
space:
mode:
authorArne Schwabe <arne@rfc2549.org>2015-04-15 00:17:26 +0200
committerArne Schwabe <arne@rfc2549.org>2015-04-15 00:20:23 +0200
commitc3ae4aaac9f0b168aed063d3e86c5196608eaba1 (patch)
tree1a18e7d8751d4dd3682d82d12c8441b335112984 /main/openssl/crypto/sha/asm/sha1-s390x.pl
parent5e42114d22faefe7c272b1b498fdf5640da494c7 (diff)
Move more to git, add submodules, fix build script, change hgignore to gitignore
Diffstat (limited to 'main/openssl/crypto/sha/asm/sha1-s390x.pl')
m---------main/openssl0
-rw-r--r--main/openssl/crypto/sha/asm/sha1-s390x.pl246
2 files changed, 0 insertions, 246 deletions
diff --git a/main/openssl b/main/openssl
new file mode 160000
+Subproject 4d377a9ce111930d8a8f06dc0e94a892a7f6c51
diff --git a/main/openssl/crypto/sha/asm/sha1-s390x.pl b/main/openssl/crypto/sha/asm/sha1-s390x.pl
deleted file mode 100644
index 9193dda4..00000000
--- a/main/openssl/crypto/sha/asm/sha1-s390x.pl
+++ /dev/null
@@ -1,246 +0,0 @@
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# SHA1 block procedure for s390x.
-
-# April 2007.
-#
-# Performance is >30% better than gcc 3.3 generated code. But the real
-# twist is that SHA1 hardware support is detected and utilized. In
-# which case performance can reach further >4.5x for larger chunks.
-
-# January 2009.
-#
-# Optimize Xupdate for amount of memory references and reschedule
-# instructions to favour dual-issue z10 pipeline. On z10 hardware is
-# "only" ~2.3x faster than software.
-
-# November 2010.
-#
-# Adapt for -m31 build. If kernel supports what's called "highgprs"
-# feature on Linux [see /proc/cpuinfo], it's possible to use 64-bit
-# instructions and achieve "64-bit" performance even in 31-bit legacy
-# application context. The feature is not specific to any particular
-# processor, as long as it's "z-CPU". Latter implies that the code
-# remains z/Architecture specific.
-
-$kimdfunc=1; # magic function code for kimd instruction
-
-$flavour = shift;
-
-if ($flavour =~ /3[12]/) {
- $SIZE_T=4;
- $g="";
-} else {
- $SIZE_T=8;
- $g="g";
-}
-
-while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {}
-open STDOUT,">$output";
-
-$K_00_39="%r0"; $K=$K_00_39;
-$K_40_79="%r1";
-$ctx="%r2"; $prefetch="%r2";
-$inp="%r3";
-$len="%r4";
-
-$A="%r5";
-$B="%r6";
-$C="%r7";
-$D="%r8";
-$E="%r9"; @V=($A,$B,$C,$D,$E);
-$t0="%r10";
-$t1="%r11";
-@X=("%r12","%r13","%r14");
-$sp="%r15";
-
-$stdframe=16*$SIZE_T+4*8;
-$frame=$stdframe+16*4;
-
-sub Xupdate {
-my $i=shift;
-
-$code.=<<___ if ($i==15);
- lg $prefetch,$stdframe($sp) ### Xupdate(16) warm-up
- lr $X[0],$X[2]
-___
-return if ($i&1); # Xupdate is vectorized and executed every 2nd cycle
-$code.=<<___ if ($i<16);
- lg $X[0],`$i*4`($inp) ### Xload($i)
- rllg $X[1],$X[0],32
-___
-$code.=<<___ if ($i>=16);
- xgr $X[0],$prefetch ### Xupdate($i)
- lg $prefetch,`$stdframe+4*(($i+2)%16)`($sp)
- xg $X[0],`$stdframe+4*(($i+8)%16)`($sp)
- xgr $X[0],$prefetch
- rll $X[0],$X[0],1
- rllg $X[1],$X[0],32
- rll $X[1],$X[1],1
- rllg $X[0],$X[1],32
- lr $X[2],$X[1] # feedback
-___
-$code.=<<___ if ($i<=70);
- stg $X[0],`$stdframe+4*($i%16)`($sp)
-___
-unshift(@X,pop(@X));
-}
-
-sub BODY_00_19 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$d
- xr $t0,$c
- alr $e,$t1
- nr $t0,$b
- alr $e,$xi
- xr $t0,$d
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-sub BODY_20_39 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$b
- alr $e,$t1
- xr $t0,$c
- alr $e,$xi
- xr $t0,$d
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-sub BODY_40_59 {
-my ($i,$a,$b,$c,$d,$e)=@_;
-my $xi=$X[1];
-
- &Xupdate($i);
-$code.=<<___;
- alr $e,$K ### $i
- rll $t1,$a,5
- lr $t0,$b
- alr $e,$t1
- or $t0,$c
- lr $t1,$b
- nr $t0,$d
- nr $t1,$c
- alr $e,$xi
- or $t0,$t1
- rll $b,$b,30
- alr $e,$t0
-___
-}
-
-$code.=<<___;
-.text
-.align 64
-.type Ktable,\@object
-Ktable: .long 0x5a827999,0x6ed9eba1,0x8f1bbcdc,0xca62c1d6
- .skip 48 #.long 0,0,0,0,0,0,0,0,0,0,0,0
-.size Ktable,.-Ktable
-.globl sha1_block_data_order
-.type sha1_block_data_order,\@function
-sha1_block_data_order:
-___
-$code.=<<___ if ($kimdfunc);
- larl %r1,OPENSSL_s390xcap_P
- lg %r0,0(%r1)
- tmhl %r0,0x4000 # check for message-security assist
- jz .Lsoftware
- lghi %r0,0
- la %r1,`2*$SIZE_T`($sp)
- .long 0xb93e0002 # kimd %r0,%r2
- lg %r0,`2*$SIZE_T`($sp)
- tmhh %r0,`0x8000>>$kimdfunc`
- jz .Lsoftware
- lghi %r0,$kimdfunc
- lgr %r1,$ctx
- lgr %r2,$inp
- sllg %r3,$len,6
- .long 0xb93e0002 # kimd %r0,%r2
- brc 1,.-4 # pay attention to "partial completion"
- br %r14
-.align 16
-.Lsoftware:
-___
-$code.=<<___;
- lghi %r1,-$frame
- st${g} $ctx,`2*$SIZE_T`($sp)
- stm${g} %r6,%r15,`6*$SIZE_T`($sp)
- lgr %r0,$sp
- la $sp,0(%r1,$sp)
- st${g} %r0,0($sp)
-
- larl $t0,Ktable
- llgf $A,0($ctx)
- llgf $B,4($ctx)
- llgf $C,8($ctx)
- llgf $D,12($ctx)
- llgf $E,16($ctx)
-
- lg $K_00_39,0($t0)
- lg $K_40_79,8($t0)
-
-.Lloop:
- rllg $K_00_39,$K_00_39,32
-___
-for ($i=0;$i<20;$i++) { &BODY_00_19($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- rllg $K_00_39,$K_00_39,32
-___
-for (;$i<40;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___; $K=$K_40_79;
- rllg $K_40_79,$K_40_79,32
-___
-for (;$i<60;$i++) { &BODY_40_59($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
- rllg $K_40_79,$K_40_79,32
-___
-for (;$i<80;$i++) { &BODY_20_39($i,@V); unshift(@V,pop(@V)); }
-$code.=<<___;
-
- l${g} $ctx,`$frame+2*$SIZE_T`($sp)
- la $inp,64($inp)
- al $A,0($ctx)
- al $B,4($ctx)
- al $C,8($ctx)
- al $D,12($ctx)
- al $E,16($ctx)
- st $A,0($ctx)
- st $B,4($ctx)
- st $C,8($ctx)
- st $D,12($ctx)
- st $E,16($ctx)
- brct${g} $len,.Lloop
-
- lm${g} %r6,%r15,`$frame+6*$SIZE_T`($sp)
- br %r14
-.size sha1_block_data_order,.-sha1_block_data_order
-.string "SHA1 block transform for s390x, CRYPTOGAMS by <appro\@openssl.org>"
-.comm OPENSSL_s390xcap_P,16,8
-___
-
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-
-print $code;
-close STDOUT;