summaryrefslogtreecommitdiff
path: root/main/openssl/crypto/modes/asm/ghash-parisc.pl
diff options
context:
space:
mode:
authorArne Schwabe <arne@rfc2549.org>2015-04-15 00:17:26 +0200
committerArne Schwabe <arne@rfc2549.org>2015-04-15 00:20:23 +0200
commitc3ae4aaac9f0b168aed063d3e86c5196608eaba1 (patch)
tree1a18e7d8751d4dd3682d82d12c8441b335112984 /main/openssl/crypto/modes/asm/ghash-parisc.pl
parent5e42114d22faefe7c272b1b498fdf5640da494c7 (diff)
Move more to git, add submodules, fix build script, change hgignore to gitignore
Diffstat (limited to 'main/openssl/crypto/modes/asm/ghash-parisc.pl')
m---------main/openssl0
-rw-r--r--main/openssl/crypto/modes/asm/ghash-parisc.pl731
2 files changed, 0 insertions, 731 deletions
diff --git a/main/openssl b/main/openssl
new file mode 160000
+Subproject 4d377a9ce111930d8a8f06dc0e94a892a7f6c51
diff --git a/main/openssl/crypto/modes/asm/ghash-parisc.pl b/main/openssl/crypto/modes/asm/ghash-parisc.pl
deleted file mode 100644
index d5ad96b4..00000000
--- a/main/openssl/crypto/modes/asm/ghash-parisc.pl
+++ /dev/null
@@ -1,731 +0,0 @@
-#!/usr/bin/env perl
-#
-# ====================================================================
-# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-#
-# April 2010
-#
-# The module implements "4-bit" GCM GHASH function and underlying
-# single multiplication operation in GF(2^128). "4-bit" means that it
-# uses 256 bytes per-key table [+128 bytes shared table]. On PA-7100LC
-# it processes one byte in 19.6 cycles, which is more than twice as
-# fast as code generated by gcc 3.2. PA-RISC 2.0 loop is scheduled for
-# 8 cycles, but measured performance on PA-8600 system is ~9 cycles per
-# processed byte. This is ~2.2x faster than 64-bit code generated by
-# vendor compiler (which used to be very hard to beat:-).
-#
-# Special thanks to polarhome.com for providing HP-UX account.
-
-$flavour = shift;
-$output = shift;
-open STDOUT,">$output";
-
-if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
- $NREGS =6;
-} else {
- $LEVEL ="1.0"; #"\n\t.ALLOW\t2.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
- $NREGS =11;
-}
-
-$FRAME=10*$SIZE_T+$FRAME_MARKER;# NREGS saved regs + frame marker
- # [+ argument transfer]
-
-################# volatile registers
-$Xi="%r26"; # argument block
-$Htbl="%r25";
-$inp="%r24";
-$len="%r23";
-$Hhh=$Htbl; # variables
-$Hll="%r22";
-$Zhh="%r21";
-$Zll="%r20";
-$cnt="%r19";
-$rem_4bit="%r28";
-$rem="%r29";
-$mask0xf0="%r31";
-
-################# preserved registers
-$Thh="%r1";
-$Tll="%r2";
-$nlo="%r3";
-$nhi="%r4";
-$byte="%r5";
-if ($SIZE_T==4) {
- $Zhl="%r6";
- $Zlh="%r7";
- $Hhl="%r8";
- $Hlh="%r9";
- $Thl="%r10";
- $Tlh="%r11";
-}
-$rem2="%r6"; # used in PA-RISC 2.0 code
-
-$code.=<<___;
- .LEVEL $LEVEL
- .SPACE \$TEXT\$
- .SUBSPA \$CODE\$,QUAD=0,ALIGN=8,ACCESS=0x2C,CODE_ONLY
-
- .EXPORT gcm_gmult_4bit,ENTRY,ARGW0=GR,ARGW1=GR
- .ALIGN 64
-gcm_gmult_4bit
- .PROC
- .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=$NREGS
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
-___
-$code.=<<___ if ($SIZE_T==4);
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
-___
-$code.=<<___;
- blr %r0,$rem_4bit
- ldi 3,$rem
-L\$pic_gmult
- andcm $rem_4bit,$rem,$rem_4bit
- addl $inp,$len,$len
- ldo L\$rem_4bit-L\$pic_gmult($rem_4bit),$rem_4bit
- ldi 0xf0,$mask0xf0
-___
-$code.=<<___ if ($SIZE_T==4);
- ldi 31,$rem
- mtctl $rem,%cr11
- extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
- b L\$parisc1_gmult
- nop
-___
-
-$code.=<<___;
- ldb 15($Xi),$nlo
- ldo 8($Htbl),$Hll
-
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
-
- ldd $nlo($Hll),$Zll
- ldd $nlo($Hhh),$Zhh
-
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldb 14($Xi),$nlo
-
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- b L\$oop_gmult_pa2
- ldi 13,$cnt
-
- .ALIGN 8
-L\$oop_gmult_pa2
- xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
- depd,z $Zll,60,4,$rem
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
-
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
- ldbx $cnt($Xi),$nlo
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
-
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
- ldd $rem($rem_4bit),$rem
-
- xor $Tll,$Zll,$Zll
- addib,uv -1,$cnt,L\$oop_gmult_pa2
- xor $Thh,$Zhh,$Zhh
-
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
-
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
-
- xor $rem,$Zhh,$Zhh
- std $Zll,8($Xi)
- std $Zhh,0($Xi)
-___
-
-$code.=<<___ if ($SIZE_T==4);
- b L\$done_gmult
- nop
-
-L\$parisc1_gmult
- ldb 15($Xi),$nlo
- ldo 12($Htbl),$Hll
- ldo 8($Htbl),$Hlh
- ldo 4($Htbl),$Hhl
-
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
-
- ldwx $nlo($Hll),$Zll
- ldwx $nlo($Hlh),$Zlh
- ldwx $nlo($Hhl),$Zhl
- ldwx $nlo($Hhh),$Zhh
- zdep $Zll,28,4,$rem
- ldb 14($Xi),$nlo
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhl,$Zlh,4,$Zlh
- ldwx $nhi($Hlh),$Tlh
- shrpw $Zhh,$Zhl,4,$Zhl
- ldwx $nhi($Hhl),$Thl
- extru $Zhh,27,28,$Zhh
- ldwx $nhi($Hhh),$Thh
- xor $rem,$Zhh,$Zhh
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
-
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $Thl,$Zhl,$Zhl
- b L\$oop_gmult_pa1
- ldi 13,$cnt
-
- .ALIGN 8
-L\$oop_gmult_pa1
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- ldbx $cnt($Xi),$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $rem,$Zhh,$Zhh
- zdep $Zll,28,4,$rem
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $rem($rem_4bit),$rem
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- and $mask0xf0,$nlo,$nhi
- extru $Zhh,27,28,$Zhh
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $rem,$Zhh,$Zhh
- addib,uv -1,$cnt,L\$oop_gmult_pa1
- xor $Thl,$Zhl,$Zhl
-
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $rem,$Zhh,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- zdep $Zll,28,4,$rem
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- extru $Zhh,27,28,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Tlh,$Zlh,$Zlh
- xor $rem,$Zhh,$Zhh
- stw $Zll,12($Xi)
- xor $Thl,$Zhl,$Zhl
- stw $Zlh,8($Xi)
- xor $Thh,$Zhh,$Zhh
- stw $Zhl,4($Xi)
- stw $Zhh,0($Xi)
-___
-$code.=<<___;
-L\$done_gmult
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
-___
-$code.=<<___ if ($SIZE_T==4);
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
-___
-$code.=<<___;
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
-
- .EXPORT gcm_ghash_4bit,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
- .ALIGN 64
-gcm_ghash_4bit
- .PROC
- .CALLINFO FRAME=`$FRAME-10*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=11
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
-___
-$code.=<<___ if ($SIZE_T==4);
- $PUSH %r7,`-$FRAME+4*$SIZE_T`(%sp)
- $PUSH %r8,`-$FRAME+5*$SIZE_T`(%sp)
- $PUSH %r9,`-$FRAME+6*$SIZE_T`(%sp)
- $PUSH %r10,`-$FRAME+7*$SIZE_T`(%sp)
- $PUSH %r11,`-$FRAME+8*$SIZE_T`(%sp)
-___
-$code.=<<___;
- blr %r0,$rem_4bit
- ldi 3,$rem
-L\$pic_ghash
- andcm $rem_4bit,$rem,$rem_4bit
- addl $inp,$len,$len
- ldo L\$rem_4bit-L\$pic_ghash($rem_4bit),$rem_4bit
- ldi 0xf0,$mask0xf0
-___
-$code.=<<___ if ($SIZE_T==4);
- ldi 31,$rem
- mtctl $rem,%cr11
- extrd,u,*= $rem,%sar,1,$rem ; executes on PA-RISC 1.0
- b L\$parisc1_ghash
- nop
-___
-
-$code.=<<___;
- ldb 15($Xi),$nlo
- ldo 8($Htbl),$Hll
-
-L\$outer_ghash_pa2
- ldb 15($inp),$nhi
- xor $nhi,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
-
- ldd $nlo($Hll),$Zll
- ldd $nlo($Hhh),$Zhh
-
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldb 14($Xi),$nlo
- ldb 14($inp),$byte
-
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
- xor $byte,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
- b L\$oop_ghash_pa2
- ldi 13,$cnt
-
- .ALIGN 8
-L\$oop_ghash_pa2
- xor $rem,$Zhh,$Zhh ; moved here to work around gas bug
- depd,z $Zll,60,4,$rem2
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldbx $cnt($Xi),$nlo
- ldbx $cnt($inp),$byte
-
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- ldd $rem2($rem_4bit),$rem2
-
- xor $rem2,$Zhh,$Zhh
- xor $byte,$nlo,$nlo
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
-
- and $mask0xf0,$nlo,$nhi
- depd,z $nlo,59,4,$nlo
-
- extrd,u $Zhh,59,60,$Zhh
- xor $Tll,$Zll,$Zll
-
- ldd $rem($rem_4bit),$rem
- addib,uv -1,$cnt,L\$oop_ghash_pa2
- xor $Thh,$Zhh,$Zhh
-
- xor $rem,$Zhh,$Zhh
- depd,z $Zll,60,4,$rem2
-
- shrpd $Zhh,$Zll,4,$Zll
- extrd,u $Zhh,59,60,$Zhh
- ldd $nlo($Hll),$Tll
- ldd $nlo($Hhh),$Thh
-
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
-
- depd,z $Zll,60,4,$rem
- shrpd $Zhh,$Zll,4,$Zll
- ldd $rem2($rem_4bit),$rem2
-
- xor $rem2,$Zhh,$Zhh
- ldd $nhi($Hll),$Tll
- ldd $nhi($Hhh),$Thh
-
- extrd,u $Zhh,59,60,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Thh,$Zhh,$Zhh
- ldd $rem($rem_4bit),$rem
-
- xor $rem,$Zhh,$Zhh
- std $Zll,8($Xi)
- ldo 16($inp),$inp
- std $Zhh,0($Xi)
- cmpb,*<> $inp,$len,L\$outer_ghash_pa2
- copy $Zll,$nlo
-___
-
-$code.=<<___ if ($SIZE_T==4);
- b L\$done_ghash
- nop
-
-L\$parisc1_ghash
- ldb 15($Xi),$nlo
- ldo 12($Htbl),$Hll
- ldo 8($Htbl),$Hlh
- ldo 4($Htbl),$Hhl
-
-L\$outer_ghash_pa1
- ldb 15($inp),$byte
- xor $byte,$nlo,$nlo
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
-
- ldwx $nlo($Hll),$Zll
- ldwx $nlo($Hlh),$Zlh
- ldwx $nlo($Hhl),$Zhl
- ldwx $nlo($Hhh),$Zhh
- zdep $Zll,28,4,$rem
- ldb 14($Xi),$nlo
- ldb 14($inp),$byte
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhl,$Zlh,4,$Zlh
- ldwx $nhi($Hlh),$Tlh
- shrpw $Zhh,$Zhl,4,$Zhl
- ldwx $nhi($Hhl),$Thl
- extru $Zhh,27,28,$Zhh
- ldwx $nhi($Hhh),$Thh
- xor $byte,$nlo,$nlo
- xor $rem,$Zhh,$Zhh
- and $mask0xf0,$nlo,$nhi
- zdep $nlo,27,4,$nlo
-
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $Thl,$Zhl,$Zhl
- b L\$oop_ghash_pa1
- ldi 13,$cnt
-
- .ALIGN 8
-L\$oop_ghash_pa1
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- ldbx $cnt($Xi),$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- ldbx $cnt($inp),$byte
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $rem,$Zhh,$Zhh
- zdep $Zll,28,4,$rem
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $rem($rem_4bit),$rem
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $byte,$nlo,$nlo
- shrpw $Zhh,$Zhl,4,$Zhl
- and $mask0xf0,$nlo,$nhi
- extru $Zhh,27,28,$Zhh
- zdep $nlo,27,4,$nlo
- xor $Tll,$Zll,$Zll
- ldwx $nlo($Hll),$Tll
- xor $Tlh,$Zlh,$Zlh
- ldwx $nlo($Hlh),$Tlh
- xor $rem,$Zhh,$Zhh
- addib,uv -1,$cnt,L\$oop_ghash_pa1
- xor $Thl,$Zhl,$Zhl
-
- zdep $Zll,28,4,$rem
- ldwx $nlo($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- ldwx $nlo($Hhh),$Thh
- shrpw $Zhl,$Zlh,4,$Zlh
- xor $Tll,$Zll,$Zll
- ldwx $nhi($Hll),$Tll
- shrpw $Zhh,$Zhl,4,$Zhl
- xor $Tlh,$Zlh,$Zlh
- ldwx $nhi($Hlh),$Tlh
- extru $Zhh,27,28,$Zhh
- xor $rem,$Zhh,$Zhh
- xor $Thl,$Zhl,$Zhl
- ldwx $nhi($Hhl),$Thl
- xor $Thh,$Zhh,$Zhh
- ldwx $nhi($Hhh),$Thh
- zdep $Zll,28,4,$rem
- ldwx $rem($rem_4bit),$rem
- shrpw $Zlh,$Zll,4,$Zll
- shrpw $Zhl,$Zlh,4,$Zlh
- shrpw $Zhh,$Zhl,4,$Zhl
- extru $Zhh,27,28,$Zhh
- xor $Tll,$Zll,$Zll
- xor $Tlh,$Zlh,$Zlh
- xor $rem,$Zhh,$Zhh
- stw $Zll,12($Xi)
- xor $Thl,$Zhl,$Zhl
- stw $Zlh,8($Xi)
- xor $Thh,$Zhh,$Zhh
- stw $Zhl,4($Xi)
- ldo 16($inp),$inp
- stw $Zhh,0($Xi)
- comb,<> $inp,$len,L\$outer_ghash_pa1
- copy $Zll,$nlo
-___
-$code.=<<___;
-L\$done_ghash
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2 ; standard epilogue
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
-___
-$code.=<<___ if ($SIZE_T==4);
- $POP `-$FRAME+4*$SIZE_T`(%sp),%r7
- $POP `-$FRAME+5*$SIZE_T`(%sp),%r8
- $POP `-$FRAME+6*$SIZE_T`(%sp),%r9
- $POP `-$FRAME+7*$SIZE_T`(%sp),%r10
- $POP `-$FRAME+8*$SIZE_T`(%sp),%r11
-___
-$code.=<<___;
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
-
- .ALIGN 64
-L\$rem_4bit
- .WORD `0x0000<<16`,0,`0x1C20<<16`,0,`0x3840<<16`,0,`0x2460<<16`,0
- .WORD `0x7080<<16`,0,`0x6CA0<<16`,0,`0x48C0<<16`,0,`0x54E0<<16`,0
- .WORD `0xE100<<16`,0,`0xFD20<<16`,0,`0xD940<<16`,0,`0xC560<<16`,0
- .WORD `0x9180<<16`,0,`0x8DA0<<16`,0,`0xA9C0<<16`,0,`0xB5E0<<16`,0
- .STRINGZ "GHASH for PA-RISC, GRYPTOGAMS by <appro\@openssl.org>"
- .ALIGN 64
-___
-
-# Explicitly encode PA-RISC 2.0 instructions used in this module, so
-# that it can be compiled with .LEVEL 1.0. It should be noted that I
-# wouldn't have to do this, if GNU assembler understood .ALLOW 2.0
-# directive...
-
-my $ldd = sub {
- my ($mod,$args) = @_;
- my $orig = "ldd$mod\t$args";
-
- if ($args =~ /%r([0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 4
- { my $opcode=(0x03<<26)|($2<<21)|($1<<16)|(3<<6)|$3;
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /(\-?[0-9]+)\(%r([0-9]+)\),%r([0-9]+)/) # format 5
- { my $opcode=(0x03<<26)|($2<<21)|(1<<12)|(3<<6)|$3;
- $opcode|=(($1&0xF)<<17)|(($1&0x10)<<12); # encode offset
- $opcode|=(1<<5) if ($mod =~ /^,m/);
- $opcode|=(1<<13) if ($mod =~ /^,mb/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $std = sub {
- my ($mod,$args) = @_;
- my $orig = "std$mod\t$args";
-
- if ($args =~ /%r([0-9]+),(\-?[0-9]+)\(%r([0-9]+)\)/) # format 3 suffices
- { my $opcode=(0x1c<<26)|($3<<21)|($1<<16)|(($2&0x1FF8)<<1)|(($2>>13)&1);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $extrd = sub {
- my ($mod,$args) = @_;
- my $orig = "extrd$mod\t$args";
-
- # I only have ",u" completer, it's implicitly encoded...
- if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 15
- { my $opcode=(0x36<<26)|($1<<21)|($4<<16);
- my $len=32-$3;
- $opcode |= (($2&0x20)<<6)|(($2&0x1f)<<5); # encode pos
- $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%sar,([0-9]+),%r([0-9]+)/) # format 12
- { my $opcode=(0x34<<26)|($1<<21)|($3<<16)|(2<<11)|(1<<9);
- my $len=32-$2;
- $opcode |= (($len&0x20)<<3)|($len&0x1f); # encode len
- $opcode |= (1<<13) if ($mod =~ /,\**=/);
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $shrpd = sub {
- my ($mod,$args) = @_;
- my $orig = "shrpd$mod\t$args";
-
- if ($args =~ /%r([0-9]+),%r([0-9]+),([0-9]+),%r([0-9]+)/) # format 14
- { my $opcode=(0x34<<26)|($2<<21)|($1<<16)|(1<<10)|$4;
- my $cpos=63-$3;
- $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode sa
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- elsif ($args =~ /%r([0-9]+),%r([0-9]+),%sar,%r([0-9]+)/) # format 11
- { sprintf "\t.WORD\t0x%08x\t; %s",
- (0x34<<26)|($2<<21)|($1<<16)|(1<<9)|$3,$orig;
- }
- else { "\t".$orig; }
-};
-
-my $depd = sub {
- my ($mod,$args) = @_;
- my $orig = "depd$mod\t$args";
-
- # I only have ",z" completer, it's impicitly encoded...
- if ($args =~ /%r([0-9]+),([0-9]+),([0-9]+),%r([0-9]+)/) # format 16
- { my $opcode=(0x3c<<26)|($4<<21)|($1<<16);
- my $cpos=63-$2;
- my $len=32-$3;
- $opcode |= (($cpos&0x20)<<6)|(($cpos&0x1f)<<5); # encode pos
- $opcode |= (($len&0x20)<<7)|($len&0x1f); # encode len
- sprintf "\t.WORD\t0x%08x\t; %s",$opcode,$orig;
- }
- else { "\t".$orig; }
-};
-
-sub assemble {
- my ($mnemonic,$mod,$args)=@_;
- my $opcode = eval("\$$mnemonic");
-
- ref($opcode) eq 'CODE' ? &$opcode($mod,$args) : "\t$mnemonic$mod\t$args";
-}
-
-foreach (split("\n",$code)) {
- s/\`([^\`]*)\`/eval $1/ge;
- if ($SIZE_T==4) {
- s/^\s+([a-z]+)([\S]*)\s+([\S]*)/&assemble($1,$2,$3)/e;
- s/cmpb,\*/comb,/;
- s/,\*/,/;
- }
- s/\bbv\b/bve/ if ($SIZE_T==8);
- print $_,"\n";
-}
-
-close STDOUT;