diff options
Diffstat (limited to 'main/openssl/crypto/x86cpuid.pl')
| -rw-r--r-- | main/openssl/crypto/x86cpuid.pl | 312 | 
1 files changed, 312 insertions, 0 deletions
| diff --git a/main/openssl/crypto/x86cpuid.pl b/main/openssl/crypto/x86cpuid.pl new file mode 100644 index 00000000..a7464af1 --- /dev/null +++ b/main/openssl/crypto/x86cpuid.pl @@ -0,0 +1,312 @@ +#!/usr/bin/env perl + +$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; +push(@INC, "${dir}perlasm", "perlasm"); +require "x86asm.pl"; + +&asm_init($ARGV[0],"x86cpuid"); + +for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } + +&function_begin("OPENSSL_ia32_cpuid"); +	&xor	("edx","edx"); +	&pushf	(); +	&pop	("eax"); +	&mov	("ecx","eax"); +	&xor	("eax",1<<21); +	&push	("eax"); +	&popf	(); +	&pushf	(); +	&pop	("eax"); +	&xor	("ecx","eax"); +	&bt	("ecx",21); +	&jnc	(&label("done")); +	&xor	("eax","eax"); +	&cpuid	(); +	&mov	("edi","eax");		# max value for standard query level + +	&xor	("eax","eax"); +	&cmp	("ebx",0x756e6547);	# "Genu" +	&setne	(&LB("eax")); +	&mov	("ebp","eax"); +	&cmp	("edx",0x49656e69);	# "ineI" +	&setne	(&LB("eax")); +	&or	("ebp","eax"); +	&cmp	("ecx",0x6c65746e);	# "ntel" +	&setne	(&LB("eax")); +	&or	("ebp","eax");		# 0 indicates Intel CPU +	&jz	(&label("intel")); + +	&cmp	("ebx",0x68747541);	# "Auth" +	&setne	(&LB("eax")); +	&mov	("esi","eax"); +	&cmp	("edx",0x69746E65);	# "enti" +	&setne	(&LB("eax")); +	&or	("esi","eax"); +	&cmp	("ecx",0x444D4163);	# "cAMD" +	&setne	(&LB("eax")); +	&or	("esi","eax");		# 0 indicates AMD CPU +	&jnz	(&label("intel")); + +	# AMD specific +	&mov	("eax",0x80000000); +	&cpuid	(); +	&cmp	("eax",0x80000008); +	&jb	(&label("intel")); + +	&mov	("eax",0x80000008); +	&cpuid	(); +	&movz	("esi",&LB("ecx"));	# number of cores - 1 +	&inc	("esi");		# number of cores + +	&mov	("eax",1); +	&cpuid	(); +	&bt	("edx",28); +	&jnc	(&label("done")); +	&shr	("ebx",16); +	&and	("ebx",0xff); +	&cmp	("ebx","esi"); +	&ja	(&label("done")); +	&and	("edx",0xefffffff);	# clear hyper-threading bit +	&jmp	(&label("done")); +	 +&set_label("intel"); +	&cmp	("edi",4); +	&mov	("edi",-1); +	&jb	(&label("nocacheinfo")); + +	&mov	("eax",4); +	&mov	("ecx",0);		# query L1D +	&cpuid	(); +	&mov	("edi","eax"); +	&shr	("edi",14); +	&and	("edi",0xfff);		# number of cores -1 per L1D + +&set_label("nocacheinfo"); +	&mov	("eax",1); +	&cpuid	(); +	&cmp	("ebp",0); +	&jne	(&label("notP4")); +	&and	(&HB("eax"),15);	# familiy ID +	&cmp	(&HB("eax"),15);	# P4? +	&jne	(&label("notP4")); +	&or	("edx",1<<20);		# use reserved bit to engage RC4_CHAR +&set_label("notP4"); +	&bt	("edx",28);		# test hyper-threading bit +	&jnc	(&label("done")); +	&and	("edx",0xefffffff); +	&cmp	("edi",0); +	&je	(&label("done")); + +	&or	("edx",0x10000000); +	&shr	("ebx",16); +	&cmp	(&LB("ebx"),1); +	&ja	(&label("done")); +	&and	("edx",0xefffffff);	# clear hyper-threading bit if not +&set_label("done"); +	&mov	("eax","edx"); +	&mov	("edx","ecx"); +&function_end("OPENSSL_ia32_cpuid"); + +&external_label("OPENSSL_ia32cap_P"); + +&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); +	&xor	("eax","eax"); +	&xor	("edx","edx"); +	&picmeup("ecx","OPENSSL_ia32cap_P"); +	&bt	(&DWP(0,"ecx"),4); +	&jnc	(&label("notsc")); +	&rdtsc	(); +&set_label("notsc"); +	&ret	(); +&function_end_B("OPENSSL_rdtsc"); + +# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host], +# but it's safe to call it on any [supported] 32-bit platform... +# Just check for [non-]zero return value... +&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); +	&picmeup("ecx","OPENSSL_ia32cap_P"); +	&bt	(&DWP(0,"ecx"),4); +	&jnc	(&label("nohalt"));	# no TSC + +	&data_word(0x9058900e);		# push %cs; pop %eax +	&and	("eax",3); +	&jnz	(&label("nohalt"));	# not enough privileges + +	&pushf	(); +	&pop	("eax") +	&bt	("eax",9); +	&jnc	(&label("nohalt"));	# interrupts are disabled + +	&rdtsc	(); +	&push	("edx"); +	&push	("eax"); +	&halt	(); +	&rdtsc	(); + +	&sub	("eax",&DWP(0,"esp")); +	&sbb	("edx",&DWP(4,"esp")); +	&add	("esp",8); +	&ret	(); + +&set_label("nohalt"); +	&xor	("eax","eax"); +	&xor	("edx","edx"); +	&ret	(); +&function_end_B("OPENSSL_instrument_halt"); + +# Essentially there is only one use for this function. Under DJGPP: +# +#	#include <go32.h> +#	... +#	i=OPENSSL_far_spin(_dos_ds,0x46c); +#	... +# to obtain the number of spins till closest timer interrupt. + +&function_begin_B("OPENSSL_far_spin"); +	&pushf	(); +	&pop	("eax") +	&bt	("eax",9); +	&jnc	(&label("nospin"));	# interrupts are disabled + +	&mov	("eax",&DWP(4,"esp")); +	&mov	("ecx",&DWP(8,"esp")); +	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds +	&xor	("eax","eax"); +	&mov	("edx",&DWP(0,"ecx")); +	&jmp	(&label("spin")); + +	&align	(16); +&set_label("spin"); +	&inc	("eax"); +	&cmp	("edx",&DWP(0,"ecx")); +	&je	(&label("spin")); + +	&data_word (0x1f909090);	# pop	%ds +	&ret	(); + +&set_label("nospin"); +	&xor	("eax","eax"); +	&xor	("edx","edx"); +	&ret	(); +&function_end_B("OPENSSL_far_spin"); + +&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD"); +	&xor	("eax","eax"); +	&xor	("edx","edx"); +	&picmeup("ecx","OPENSSL_ia32cap_P"); +	&mov	("ecx",&DWP(0,"ecx")); +	&bt	(&DWP(0,"ecx"),1); +	&jnc	(&label("no_x87")); +	if ($sse2) { +		&bt	(&DWP(0,"ecx"),26); +		&jnc	(&label("no_sse2")); +		&pxor	("xmm0","xmm0"); +		&pxor	("xmm1","xmm1"); +		&pxor	("xmm2","xmm2"); +		&pxor	("xmm3","xmm3"); +		&pxor	("xmm4","xmm4"); +		&pxor	("xmm5","xmm5"); +		&pxor	("xmm6","xmm6"); +		&pxor	("xmm7","xmm7"); +	&set_label("no_sse2"); +	} +	# just a bunch of fldz to zap the fp/mm bank followed by finit... +	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b); +&set_label("no_x87"); +	&lea	("eax",&DWP(4,"esp")); +	&ret	(); +&function_end_B("OPENSSL_wipe_cpu"); + +&function_begin_B("OPENSSL_atomic_add"); +	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg +	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg +	&push	("ebx"); +	&nop	(); +	&mov	("eax",&DWP(0,"edx")); +&set_label("spin"); +	&lea	("ebx",&DWP(0,"eax","ecx")); +	&nop	(); +	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is envolved and is always reloaded +	&jne	(&label("spin")); +	&mov	("eax","ebx");	# OpenSSL expects the new value +	&pop	("ebx"); +	&ret	(); +&function_end_B("OPENSSL_atomic_add"); + +# This function can become handy under Win32 in situations when +# we don't know which calling convention, __stdcall or __cdecl(*), +# indirect callee is using. In C it can be deployed as +# +#ifdef OPENSSL_CPUID_OBJ +#	type OPENSSL_indirect_call(void *f,...); +#	... +#	OPENSSL_indirect_call(func,[up to $max arguments]); +#endif +# +# (*)	it's designed to work even for __fastcall if number of +#	arguments is 1 or 2! +&function_begin_B("OPENSSL_indirect_call"); +	{ +	my $i,$max=7;		# $max has to be chosen as 4*n-1 +				# in order to preserve eventual +				# stack alignment +	&push	("ebp"); +	&mov	("ebp","esp"); +	&sub	("esp",$max*4); +	&mov	("ecx",&DWP(12,"ebp")); +	&mov	(&DWP(0,"esp"),"ecx"); +	&mov	("edx",&DWP(16,"ebp")); +	&mov	(&DWP(4,"esp"),"edx"); +	for($i=2;$i<$max;$i++) +		{ +		# Some copies will be redundant/bogus... +		&mov	("eax",&DWP(12+$i*4,"ebp")); +		&mov	(&DWP(0+$i*4,"esp"),"eax"); +		} +	&call_ptr	(&DWP(8,"ebp"));# make the call... +	&mov	("esp","ebp");	# ... and just restore the stack pointer +				# without paying attention to what we called, +				# (__cdecl *func) or (__stdcall *one). +	&pop	("ebp"); +	&ret	(); +	} +&function_end_B("OPENSSL_indirect_call"); + +&function_begin_B("OPENSSL_cleanse"); +	&mov	("edx",&wparam(0)); +	&mov	("ecx",&wparam(1)); +	&xor	("eax","eax"); +	&cmp	("ecx",7); +	&jae	(&label("lot")); +	&cmp	("ecx",0); +	&je	(&label("ret")); +&set_label("little"); +	&mov	(&BP(0,"edx"),"al"); +	&sub	("ecx",1); +	&lea	("edx",&DWP(1,"edx")); +	&jnz	(&label("little")); +&set_label("ret"); +	&ret	(); + +&set_label("lot",16); +	&test	("edx",3); +	&jz	(&label("aligned")); +	&mov	(&BP(0,"edx"),"al"); +	&lea	("ecx",&DWP(-1,"ecx")); +	&lea	("edx",&DWP(1,"edx")); +	&jmp	(&label("lot")); +&set_label("aligned"); +	&mov	(&DWP(0,"edx"),"eax"); +	&lea	("ecx",&DWP(-4,"ecx")); +	&test	("ecx",-4); +	&lea	("edx",&DWP(4,"edx")); +	&jnz	(&label("aligned")); +	&cmp	("ecx",0); +	&jne	(&label("little")); +	&ret	(); +&function_end_B("OPENSSL_cleanse"); + +&initseg("OPENSSL_cpuid_setup"); + +&asm_finish(); | 
