summaryrefslogtreecommitdiff
path: root/vendor/github.com/templexxx/cpu/cpu.go
blob: 92295d92269f491b7a41810c0577374481c308cf (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// Package cpu implements processor feature detection
// used by the Go standard library.
package cpu

// debugOptions is set to true by the runtime if go was compiled with GOEXPERIMENT=debugcpu
// and GOOS is Linux or Darwin. This variable is linknamed in runtime/proc.go.
var debugOptions bool

var X86 x86

// "Loads data or instructions from memory to the second-level cache.
// To use the streamer, organize the data or instructions in blocks of 128 bytes,
// aligned on 128 bytes."
// From <Intel® 64 and IA-32 architectures optimization reference manual>,
// in section 3.7.3 "Hardware Prefetching for Second-Level Cache"
//
// In practice, I have found use 128bytes can gain better performance than 64bytes (one cache line).
const X86FalseSharingRange = 128

// The booleans in x86 contain the correspondingly named cpuid feature bit.
// HasAVX and HasAVX2 are only set if the OS does support XMM and YMM registers
// in addition to the cpuid feature bit being set.
// The struct is padded to avoid false sharing.
type x86 struct {
	_            [X86FalseSharingRange]byte
	HasAES       bool
	HasADX       bool
	HasAVX       bool
	HasAVX2      bool
	HasAVX512F   bool
	HasAVX512DQ  bool
	HasAVX512BW  bool
	HasAVX512VL  bool
	HasBMI1      bool
	HasBMI2      bool
	HasERMS      bool
	HasFMA       bool
	HasOSXSAVE   bool
	HasPCLMULQDQ bool
	HasPOPCNT    bool
	HasSSE2      bool
	HasSSE3      bool
	HasSSSE3     bool
	HasSSE41     bool
	HasSSE42     bool
	// The invariant TSC will run at a constant rate in all ACPI P-, C-, and T-states.
	// This is the architectural behavior moving forward. On processors with
	// invariant TSC support, the OS may use the TSC for wall clock timer services (instead of ACPI or HPET timers).
	HasInvariantTSC bool

	Cache Cache

	// TSCFrequency only meaningful when HasInvariantTSC == true.
	// Unit: Hz.
	//
	// Warn:
	// 1. If it's 0, means can't get it. Don't use it.
	// 2. Don't use it if you want "100%" precise timestamp.
	TSCFrequency uint64

	Name      string
	Signature string // DisplayFamily_DisplayModel.
	Family    uint32 // CPU family number.
	Model     uint32 // CPU model number.

	_ [X86FalseSharingRange]byte
}

// CPU Cache Size.
// -1 if undetected.
type Cache struct {
	L1I int
	L1D int
	L2  int
	L3  int
}

var PPC64 ppc64

// For ppc64x, it is safe to check only for ISA level starting on ISA v3.00,
// since there are no optional categories. There are some exceptions that also
// require kernel support to work (darn, scv), so there are feature bits for
// those as well. The minimum processor requirement is POWER8 (ISA 2.07), so we
// maintain some of the old feature checks for optional categories for
// safety.
// The struct is padded to avoid false sharing.
type ppc64 struct {
	_          [CacheLineSize]byte
	HasVMX     bool // Vector unit (Altivec)
	HasDFP     bool // Decimal Floating Point unit
	HasVSX     bool // Vector-scalar unit
	HasHTM     bool // Hardware Transactional Memory
	HasISEL    bool // Integer select
	HasVCRYPTO bool // Vector cryptography
	HasHTMNOSC bool // HTM: kernel-aborted transaction in syscalls
	HasDARN    bool // Hardware random number generator (requires kernel enablement)
	HasSCV     bool // Syscall vectored (requires kernel enablement)
	IsPOWER8   bool // ISA v2.07 (POWER8)
	IsPOWER9   bool // ISA v3.00 (POWER9)
	_          [CacheLineSize]byte
}

var ARM64 arm64

// The booleans in arm64 contain the correspondingly named cpu feature bit.
// The struct is padded to avoid false sharing.
type arm64 struct {
	_           [CacheLineSize]byte
	HasFP       bool
	HasASIMD    bool
	HasEVTSTRM  bool
	HasAES      bool
	HasPMULL    bool
	HasSHA1     bool
	HasSHA2     bool
	HasCRC32    bool
	HasATOMICS  bool
	HasFPHP     bool
	HasASIMDHP  bool
	HasCPUID    bool
	HasASIMDRDM bool
	HasJSCVT    bool
	HasFCMA     bool
	HasLRCPC    bool
	HasDCPOP    bool
	HasSHA3     bool
	HasSM3      bool
	HasSM4      bool
	HasASIMDDP  bool
	HasSHA512   bool
	HasSVE      bool
	HasASIMDFHM bool
	_           [CacheLineSize]byte
}

var S390X s390x

type s390x struct {
	_               [CacheLineSize]byte
	HasZArch        bool // z architecture mode is active [mandatory]
	HasSTFLE        bool // store facility list extended [mandatory]
	HasLDisp        bool // long (20-bit) displacements [mandatory]
	HasEImm         bool // 32-bit immediates [mandatory]
	HasDFP          bool // decimal floating point
	HasETF3Enhanced bool // ETF-3 enhanced
	HasMSA          bool // message security assist (CPACF)
	HasAES          bool // KM-AES{128,192,256} functions
	HasAESCBC       bool // KMC-AES{128,192,256} functions
	HasAESCTR       bool // KMCTR-AES{128,192,256} functions
	HasAESGCM       bool // KMA-GCM-AES{128,192,256} functions
	HasGHASH        bool // KIMD-GHASH function
	HasSHA1         bool // K{I,L}MD-SHA-1 functions
	HasSHA256       bool // K{I,L}MD-SHA-256 functions
	HasSHA512       bool // K{I,L}MD-SHA-512 functions
	HasVX           bool // vector facility. Note: the runtime sets this when it processes auxv records.
	_               [CacheLineSize]byte
}

// initialize examines the processor and sets the relevant variables above.
// This is called by the runtime package early in program initialization,
// before normal init functions are run. env is set by runtime on Linux and Darwin
// if go was compiled with GOEXPERIMENT=debugcpu.
func init() {
	doinit()
	processOptions("")
}

// options contains the cpu debug options that can be used in GODEBUGCPU.
// Options are arch dependent and are added by the arch specific doinit functions.
// Features that are mandatory for the specific GOARCH should not be added to options
// (e.g. SSE2 on amd64).
var options []option

// Option names should be lower case. e.g. avx instead of AVX.
type option struct {
	Name    string
	Feature *bool
}

// processOptions disables CPU feature values based on the parsed env string.
// The env string is expected to be of the form feature1=0,feature2=0...
// where feature names is one of the architecture specifc list stored in the
// cpu packages options variable. If env contains all=0 then all capabilities
// referenced through the options variable are disabled. Other feature
// names and values other than 0 are silently ignored.
func processOptions(env string) {
field:
	for env != "" {
		field := ""
		i := indexByte(env, ',')
		if i < 0 {
			field, env = env, ""
		} else {
			field, env = env[:i], env[i+1:]
		}
		i = indexByte(field, '=')
		if i < 0 {
			continue
		}
		key, value := field[:i], field[i+1:]

		// Only allow turning off CPU features by specifying '0'.
		if value == "0" {
			if key == "all" {
				for _, v := range options {
					*v.Feature = false
				}
				return
			} else {
				for _, v := range options {
					if v.Name == key {
						*v.Feature = false
						continue field
					}
				}
			}
		}
	}
}

// indexByte returns the index of the first instance of c in s,
// or -1 if c is not present in s.
func indexByte(s string, c byte) int {
	for i := 0; i < len(s); i++ {
		if s[i] == c {
			return i
		}
	}
	return -1
}