diff options
Diffstat (limited to 'vendor/github.com/templexxx/cpu/cpu_x86.go')
-rw-r--r-- | vendor/github.com/templexxx/cpu/cpu_x86.go | 425 |
1 files changed, 425 insertions, 0 deletions
diff --git a/vendor/github.com/templexxx/cpu/cpu_x86.go b/vendor/github.com/templexxx/cpu/cpu_x86.go new file mode 100644 index 0000000..313a29a --- /dev/null +++ b/vendor/github.com/templexxx/cpu/cpu_x86.go @@ -0,0 +1,425 @@ +// Copyright 2017 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// +build 386 amd64 amd64p32 + +package cpu + +import ( + "fmt" + "strings" +) + +const CacheLineSize = 64 + +// cpuid is implemented in cpu_x86.s. +func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32) + +// xgetbv with ecx = 0 is implemented in cpu_x86.s. +func xgetbv() (eax, edx uint32) + +const ( + // edx bits + cpuid_SSE2 = 1 << 26 + + // ecx bits + cpuid_SSE3 = 1 << 0 + cpuid_PCLMULQDQ = 1 << 1 + cpuid_SSSE3 = 1 << 9 + cpuid_FMA = 1 << 12 + cpuid_SSE41 = 1 << 19 + cpuid_SSE42 = 1 << 20 + cpuid_POPCNT = 1 << 23 + cpuid_AES = 1 << 25 + cpuid_OSXSAVE = 1 << 27 + cpuid_AVX = 1 << 28 + + // ebx bits + cpuid_BMI1 = 1 << 3 + cpuid_AVX2 = 1 << 5 + cpuid_BMI2 = 1 << 8 + cpuid_ERMS = 1 << 9 + cpuid_ADX = 1 << 19 + cpuid_AVX512F = 1 << 16 + cpuid_AVX512DQ = 1 << 17 + cpuid_AVX512BW = 1 << 30 + cpuid_AVX512VL = 1 << 31 + + // edx bits + cpuid_Invariant_TSC = 1 << 8 +) + +func doinit() { + options = []option{ + {"adx", &X86.HasADX}, + {"aes", &X86.HasAES}, + {"avx", &X86.HasAVX}, + {"avx2", &X86.HasAVX2}, + {"bmi1", &X86.HasBMI1}, + {"bmi2", &X86.HasBMI2}, + {"erms", &X86.HasERMS}, + {"fma", &X86.HasFMA}, + {"pclmulqdq", &X86.HasPCLMULQDQ}, + {"popcnt", &X86.HasPOPCNT}, + {"sse3", &X86.HasSSE3}, + {"sse41", &X86.HasSSE41}, + {"sse42", &X86.HasSSE42}, + {"ssse3", &X86.HasSSSE3}, + {"avx512f", &X86.HasAVX512F}, + {"avx512dq", &X86.HasAVX512DQ}, + {"avx512bw", &X86.HasAVX512BW}, + {"avx512vl", &X86.HasAVX512VL}, + {"invariant_tsc", &X86.HasInvariantTSC}, + + // sse2 set as last element so it can easily be removed again. See code below. + {"sse2", &X86.HasSSE2}, + } + + // Remove sse2 from options on amd64(p32) because SSE2 is a mandatory feature for these GOARCHs. + if GOARCH == "amd64" || GOARCH == "amd64p32" { + options = options[:len(options)-1] + } + + maxID, _, _, _ := cpuid(0, 0) + + if maxID < 1 { + return + } + + _, _, ecx1, edx1 := cpuid(1, 0) + X86.HasSSE2 = isSet(edx1, cpuid_SSE2) + + X86.HasSSE3 = isSet(ecx1, cpuid_SSE3) + X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ) + X86.HasSSSE3 = isSet(ecx1, cpuid_SSSE3) + X86.HasFMA = isSet(ecx1, cpuid_FMA) + X86.HasSSE41 = isSet(ecx1, cpuid_SSE41) + X86.HasSSE42 = isSet(ecx1, cpuid_SSE42) + X86.HasPOPCNT = isSet(ecx1, cpuid_POPCNT) + X86.HasAES = isSet(ecx1, cpuid_AES) + X86.HasOSXSAVE = isSet(ecx1, cpuid_OSXSAVE) + + osSupportsAVX := false + osSupportsAVX512 := false + // For XGETBV, OSXSAVE bit is required and sufficient. + if X86.HasOSXSAVE { + eax, _ := xgetbv() + // Check if XMM and YMM registers have OS support. + osSupportsAVX = isSet(eax, 1<<1) && isSet(eax, 1<<2) + // Check is ZMM registers have OS support. + osSupportsAVX512 = isSet(eax>>5, 7) && isSet(eax>>1, 3) + } + + X86.HasAVX = isSet(ecx1, cpuid_AVX) && osSupportsAVX + + if maxID < 7 { + return + } + + _, ebx7, _, _ := cpuid(7, 0) + X86.HasBMI1 = isSet(ebx7, cpuid_BMI1) + X86.HasAVX2 = isSet(ebx7, cpuid_AVX2) && osSupportsAVX + X86.HasAVX512F = isSet(ebx7, cpuid_AVX512F) && osSupportsAVX512 + X86.HasAVX512DQ = isSet(ebx7, cpuid_AVX512DQ) && osSupportsAVX512 + X86.HasAVX512BW = isSet(ebx7, cpuid_AVX512BW) && osSupportsAVX512 + X86.HasAVX512VL = isSet(ebx7, cpuid_AVX512VL) && osSupportsAVX512 + X86.HasBMI2 = isSet(ebx7, cpuid_BMI2) + X86.HasERMS = isSet(ebx7, cpuid_ERMS) + X86.HasADX = isSet(ebx7, cpuid_ADX) + + X86.Cache = getCacheSize() + + X86.HasInvariantTSC = hasInvariantTSC() + + X86.Family, X86.Model = getFamilyModel() + + X86.Signature = makeSignature(X86.Family, X86.Model) + + X86.Name = getName() + + X86.TSCFrequency = getNativeTSCFrequency(X86.Name, X86.Signature) +} + +func isSet(hwc uint32, value uint32) bool { + return hwc&value != 0 +} + +func hasInvariantTSC() bool { + if maxExtendedFunction() < 0x80000007 { + return false + } + _, _, _, edx := cpuid(0x80000007, 0) + return isSet(edx, cpuid_Invariant_TSC) +} + +func getName() string { + if maxExtendedFunction() >= 0x80000004 { + v := make([]uint32, 0, 48) + for i := uint32(0); i < 3; i++ { + a, b, c, d := cpuid(0x80000002+i, 0) + v = append(v, a, b, c, d) + } + return strings.Trim(string(valAsString(v...)), " ") + } + return "unknown" +} + +// getNativeTSCFrequency gets TSC frequency from CPUID, +// only supports Intel (Skylake or later microarchitecture) & key information is from Intel manual & kernel codes +// (especially this commit: https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684). +func getNativeTSCFrequency(name, sign string) uint64 { + + if vendorID() != Intel { + return 0 + } + + if maxFunctionID() < 0x15 { + return 0 + } + + // ApolloLake, GeminiLake, CannonLake (and presumably all new chipsets + // from this point) report the crystal frequency directly via CPUID.0x15. + // That's definitive data that we can rely upon. + eax, ebx, ecx, _ := cpuid(0x15, 0) + + // If ebx is 0, the TSC/”core crystal clock” ratio is not enumerated. + // We won't provide TSC frequency detection in this situation. + if eax == 0 || ebx == 0 { + return 0 + } + + // Skylake, Kabylake and all variants of those two chipsets report a + // crystal frequency of zero. + if ecx == 0 { // Crystal clock frequency is not enumerated. + ecx = getCrystalClockFrequency(sign) + } + + // TSC frequency = “core crystal clock frequency” * EBX/EAX. + return uint64(ecx) * (uint64(ebx) / uint64(eax)) +} + +// Copied from: CPUID Signature values of DisplayFamily and DisplayModel, +// in Intel® 64 and IA-32 Architectures Software Developer’s Manual +// Volume 4: Model-Specific Registers +// & https://github.com/torvalds/linux/blob/master/arch/x86/include/asm/intel-family.h +const ( + IntelFam6SkylakeL = "06_4EH" + IntelFam6Skylake = "06_5EH" + IntelFam6SkylakeX = "06_55H" + IntelFam6KabylakeL = "06_8EH" + IntelFam6Kabylake = "06_9EH" +) + +// getCrystalClockFrequency gets crystal clock frequency +// for Intel processors in which CPUID.15H.EBX[31:0] ÷ CPUID.0x15.EAX[31:0] is enumerated +// but CPUID.15H.ECX is not enumerated using this function to get nominal core crystal clock frequency. +// +// Actually these crystal clock frequencies provided by Intel hardcoded tables are not so accurate in some cases, +// e.g. SkyLake server CPU may have issue (All SKX subject the crystal to an EMI reduction circuit that +//reduces its actual frequency by (approximately) -0.25%): +// see https://lore.kernel.org/lkml/ff6dcea166e8ff8f2f6a03c17beab2cb436aa779.1513920414.git.len.brown@intel.com/ +// for more details. +// With this report, I set a coefficient (0.9975) for IntelFam6SkyLakeX. +// +// Unlike the kernel way (mentioned in https://github.com/torvalds/linux/commit/604dc9170f2435d27da5039a3efd757dceadc684), +// I prefer the Intel hardcoded tables, +// because after some testing (comparing with wall clock, see https://github.com/templexxx/tsc/tsc_test.go for more details), +// I found hardcoded tables are more accurate. +func getCrystalClockFrequency(sign string) uint32 { + + if maxFunctionID() < 0x16 { + return 0 + } + + switch sign { + case IntelFam6SkylakeL: + return 24 * 1000 * 1000 + case IntelFam6Skylake: + return 24 * 1000 * 1000 + case IntelFam6SkylakeX: + return 25 * 1000 * 1000 * 0.9975 + case IntelFam6KabylakeL: + return 24 * 1000 * 1000 + case IntelFam6Kabylake: + return 24 * 1000 * 1000 + } + + return 0 +} + +func getFamilyModel() (uint32, uint32) { + if maxFunctionID() < 0x1 { + return 0, 0 + } + eax, _, _, _ := cpuid(1, 0) + family := (eax >> 8) & 0xf + displayFamily := family + if family == 0xf { + displayFamily = ((eax >> 20) & 0xff) + family + } + model := (eax >> 4) & 0xf + displayModel := model + if family == 0x6 || family == 0xf { + displayModel = ((eax >> 12) & 0xf0) + model + } + return displayFamily, displayModel +} + +// signature format: XX_XXH +func makeSignature(family, model uint32) string { + signature := strings.ToUpper(fmt.Sprintf("0%x_0%xH", family, model)) + ss := strings.Split(signature, "_") + for i, s := range ss { + // Maybe insert too more `0`, drop it. + if len(s) > 2 { + s = s[1:] + ss[i] = s + } + } + return strings.Join(ss, "_") +} + +// getCacheSize is from +// https://github.com/klauspost/cpuid/blob/5a626f7029c910cc8329dae5405ee4f65034bce5/cpuid.go#L723 +func getCacheSize() Cache { + c := Cache{ + L1I: -1, + L1D: -1, + L2: -1, + L3: -1, + } + + vendor := vendorID() + switch vendor { + case Intel: + if maxFunctionID() < 4 { + return c + } + for i := uint32(0); ; i++ { + eax, ebx, ecx, _ := cpuid(4, i) + cacheType := eax & 15 + if cacheType == 0 { + break + } + cacheLevel := (eax >> 5) & 7 + coherency := int(ebx&0xfff) + 1 + partitions := int((ebx>>12)&0x3ff) + 1 + associativity := int((ebx>>22)&0x3ff) + 1 + sets := int(ecx) + 1 + size := associativity * partitions * coherency * sets + switch cacheLevel { + case 1: + if cacheType == 1 { + // 1 = Data Cache + c.L1D = size + } else if cacheType == 2 { + // 2 = Instruction Cache + c.L1I = size + } else { + if c.L1D < 0 { + c.L1I = size + } + if c.L1I < 0 { + c.L1I = size + } + } + case 2: + c.L2 = size + case 3: + c.L3 = size + } + } + case AMD, Hygon: + // Untested. + if maxExtendedFunction() < 0x80000005 { + return c + } + _, _, ecx, edx := cpuid(0x80000005, 0) + c.L1D = int(((ecx >> 24) & 0xFF) * 1024) + c.L1I = int(((edx >> 24) & 0xFF) * 1024) + + if maxExtendedFunction() < 0x80000006 { + return c + } + _, _, ecx, _ = cpuid(0x80000006, 0) + c.L2 = int(((ecx >> 16) & 0xFFFF) * 1024) + } + + return c +} + +func maxFunctionID() uint32 { + a, _, _, _ := cpuid(0, 0) + return a +} + +func maxExtendedFunction() uint32 { + eax, _, _, _ := cpuid(0x80000000, 0) + return eax +} + +const ( + Other = iota + Intel + AMD + VIA + Transmeta + NSC + KVM // Kernel-based Virtual Machine + MSVM // Microsoft Hyper-V or Windows Virtual PC + VMware + XenHVM + Bhyve + Hygon +) + +// Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID +var vendorMapping = map[string]int{ + "AMDisbetter!": AMD, + "AuthenticAMD": AMD, + "CentaurHauls": VIA, + "GenuineIntel": Intel, + "TransmetaCPU": Transmeta, + "GenuineTMx86": Transmeta, + "Geode by NSC": NSC, + "VIA VIA VIA ": VIA, + "KVMKVMKVMKVM": KVM, + "Microsoft Hv": MSVM, + "VMwareVMware": VMware, + "XenVMMXenVMM": XenHVM, + "bhyve bhyve ": Bhyve, + "HygonGenuine": Hygon, +} + +func vendorID() int { + _, b, c, d := cpuid(0, 0) + v := valAsString(b, d, c) + vend, ok := vendorMapping[string(v)] + if !ok { + return Other + } + return vend +} + +func valAsString(values ...uint32) []byte { + r := make([]byte, 4*len(values)) + for i, v := range values { + dst := r[i*4:] + dst[0] = byte(v & 0xff) + dst[1] = byte((v >> 8) & 0xff) + dst[2] = byte((v >> 16) & 0xff) + dst[3] = byte((v >> 24) & 0xff) + switch { + case dst[0] == 0: + return r[:i*4] + case dst[1] == 0: + return r[:i*4+1] + case dst[2] == 0: + return r[:i*4+2] + case dst[3] == 0: + return r[:i*4+3] + } + } + return r +} |