cpuid.go 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071
  1. // Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file.
  2. // Package cpuid provides information about the CPU running the current program.
  3. //
  4. // CPU features are detected on startup, and kept for fast access through the life of the application.
  5. // Currently x86 / x64 (AMD64) is supported.
  6. //
  7. // You can access the CPU information by accessing the shared CPU variable of the cpuid library.
  8. //
  9. // Package home: https://github.com/klauspost/cpuid
  10. package cpuid
  11. import (
  12. "strings"
  13. )
  14. // Vendor is a representation of a CPU vendor.
  15. type Vendor int
  16. const (
  17. Other Vendor = iota
  18. Intel
  19. AMD
  20. VIA
  21. Transmeta
  22. NSC
  23. KVM // Kernel-based Virtual Machine
  24. MSVM // Microsoft Hyper-V or Windows Virtual PC
  25. VMware
  26. XenHVM
  27. )
  28. const (
  29. CMOV = 1 << iota // i686 CMOV
  30. NX // NX (No-Execute) bit
  31. AMD3DNOW // AMD 3DNOW
  32. AMD3DNOWEXT // AMD 3DNowExt
  33. MMX // standard MMX
  34. MMXEXT // SSE integer functions or AMD MMX ext
  35. SSE // SSE functions
  36. SSE2 // P4 SSE functions
  37. SSE3 // Prescott SSE3 functions
  38. SSSE3 // Conroe SSSE3 functions
  39. SSE4 // Penryn SSE4.1 functions
  40. SSE4A // AMD Barcelona microarchitecture SSE4a instructions
  41. SSE42 // Nehalem SSE4.2 functions
  42. AVX // AVX functions
  43. AVX2 // AVX2 functions
  44. FMA3 // Intel FMA 3
  45. FMA4 // Bulldozer FMA4 functions
  46. XOP // Bulldozer XOP functions
  47. F16C // Half-precision floating-point conversion
  48. BMI1 // Bit Manipulation Instruction Set 1
  49. BMI2 // Bit Manipulation Instruction Set 2
  50. TBM // AMD Trailing Bit Manipulation
  51. LZCNT // LZCNT instruction
  52. POPCNT // POPCNT instruction
  53. AESNI // Advanced Encryption Standard New Instructions
  54. CLMUL // Carry-less Multiplication
  55. HTT // Hyperthreading (enabled)
  56. HLE // Hardware Lock Elision
  57. RTM // Restricted Transactional Memory
  58. RDRAND // RDRAND instruction is available
  59. RDSEED // RDSEED instruction is available
  60. ADX // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  61. SHA // Intel SHA Extensions
  62. AVX512F // AVX-512 Foundation
  63. AVX512DQ // AVX-512 Doubleword and Quadword Instructions
  64. AVX512IFMA // AVX-512 Integer Fused Multiply-Add Instructions
  65. AVX512PF // AVX-512 Prefetch Instructions
  66. AVX512ER // AVX-512 Exponential and Reciprocal Instructions
  67. AVX512CD // AVX-512 Conflict Detection Instructions
  68. AVX512BW // AVX-512 Byte and Word Instructions
  69. AVX512VL // AVX-512 Vector Length Extensions
  70. AVX512VBMI // AVX-512 Vector Bit Manipulation Instructions
  71. MPX // Intel MPX (Memory Protection Extensions)
  72. ERMS // Enhanced REP MOVSB/STOSB
  73. RDTSCP // RDTSCP Instruction
  74. CX16 // CMPXCHG16B Instruction
  75. SGX // Software Guard Extensions
  76. // Performance indicators
  77. SSE2SLOW // SSE2 is supported, but usually not faster
  78. SSE3SLOW // SSE3 is supported, but usually not faster
  79. ATOM // Atom processor, some SSSE3 instructions are slower
  80. )
  81. var flagNames = map[Flags]string{
  82. CMOV: "CMOV", // i686 CMOV
  83. NX: "NX", // NX (No-Execute) bit
  84. AMD3DNOW: "AMD3DNOW", // AMD 3DNOW
  85. AMD3DNOWEXT: "AMD3DNOWEXT", // AMD 3DNowExt
  86. MMX: "MMX", // Standard MMX
  87. MMXEXT: "MMXEXT", // SSE integer functions or AMD MMX ext
  88. SSE: "SSE", // SSE functions
  89. SSE2: "SSE2", // P4 SSE2 functions
  90. SSE3: "SSE3", // Prescott SSE3 functions
  91. SSSE3: "SSSE3", // Conroe SSSE3 functions
  92. SSE4: "SSE4.1", // Penryn SSE4.1 functions
  93. SSE4A: "SSE4A", // AMD Barcelona microarchitecture SSE4a instructions
  94. SSE42: "SSE4.2", // Nehalem SSE4.2 functions
  95. AVX: "AVX", // AVX functions
  96. AVX2: "AVX2", // AVX functions
  97. FMA3: "FMA3", // Intel FMA 3
  98. FMA4: "FMA4", // Bulldozer FMA4 functions
  99. XOP: "XOP", // Bulldozer XOP functions
  100. F16C: "F16C", // Half-precision floating-point conversion
  101. BMI1: "BMI1", // Bit Manipulation Instruction Set 1
  102. BMI2: "BMI2", // Bit Manipulation Instruction Set 2
  103. TBM: "TBM", // AMD Trailing Bit Manipulation
  104. LZCNT: "LZCNT", // LZCNT instruction
  105. POPCNT: "POPCNT", // POPCNT instruction
  106. AESNI: "AESNI", // Advanced Encryption Standard New Instructions
  107. CLMUL: "CLMUL", // Carry-less Multiplication
  108. HTT: "HTT", // Hyperthreading (enabled)
  109. HLE: "HLE", // Hardware Lock Elision
  110. RTM: "RTM", // Restricted Transactional Memory
  111. RDRAND: "RDRAND", // RDRAND instruction is available
  112. RDSEED: "RDSEED", // RDSEED instruction is available
  113. ADX: "ADX", // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  114. SHA: "SHA", // Intel SHA Extensions
  115. AVX512F: "AVX512F", // AVX-512 Foundation
  116. AVX512DQ: "AVX512DQ", // AVX-512 Doubleword and Quadword Instructions
  117. AVX512IFMA: "AVX512IFMA", // AVX-512 Integer Fused Multiply-Add Instructions
  118. AVX512PF: "AVX512PF", // AVX-512 Prefetch Instructions
  119. AVX512ER: "AVX512ER", // AVX-512 Exponential and Reciprocal Instructions
  120. AVX512CD: "AVX512CD", // AVX-512 Conflict Detection Instructions
  121. AVX512BW: "AVX512BW", // AVX-512 Byte and Word Instructions
  122. AVX512VL: "AVX512VL", // AVX-512 Vector Length Extensions
  123. AVX512VBMI: "AVX512VBMI", // AVX-512 Vector Bit Manipulation Instructions
  124. MPX: "MPX", // Intel MPX (Memory Protection Extensions)
  125. ERMS: "ERMS", // Enhanced REP MOVSB/STOSB
  126. RDTSCP: "RDTSCP", // RDTSCP Instruction
  127. CX16: "CX16", // CMPXCHG16B Instruction
  128. SGX: "SGX", // Software Guard Extensions
  129. // Performance indicators
  130. SSE2SLOW: "SSE2SLOW", // SSE2 supported, but usually not faster
  131. SSE3SLOW: "SSE3SLOW", // SSE3 supported, but usually not faster
  132. ATOM: "ATOM", // Atom processor, some SSSE3 instructions are slower
  133. }
  134. // CPUInfo contains information about the detected system CPU.
  135. type CPUInfo struct {
  136. BrandName string // Brand name reported by the CPU
  137. VendorID Vendor // Comparable CPU vendor ID
  138. HypervisorName string // Hyperrvisor Vendor
  139. Features Flags // Features of the CPU
  140. PhysicalCores int // Number of physical processor cores in your CPU. Will be 0 if undetectable.
  141. ThreadsPerCore int // Number of threads per physical core. Will be 1 if undetectable.
  142. LogicalCores int // Number of physical cores times threads that can run on each core through the use of hyperthreading. Will be 0 if undetectable.
  143. Family int // CPU family number
  144. Model int // CPU model number
  145. CacheLine int // Cache line size in bytes. Will be 0 if undetectable.
  146. Cache struct {
  147. L1I int // L1 Instruction Cache (per core or shared). Will be -1 if undetected
  148. L1D int // L1 Data Cache (per core or shared). Will be -1 if undetected
  149. L2 int // L2 Cache (per core or shared). Will be -1 if undetected
  150. L3 int // L3 Instruction Cache (per core or shared). Will be -1 if undetected
  151. }
  152. SGX SGXSupport
  153. maxFunc uint32
  154. maxExFunc uint32
  155. }
  156. var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
  157. var cpuidex func(op, op2 uint32) (eax, ebx, ecx, edx uint32)
  158. var xgetbv func(index uint32) (eax, edx uint32)
  159. var rdtscpAsm func() (eax, ebx, ecx, edx uint32)
  160. // CPU contains information about the CPU as detected on startup,
  161. // or when Detect last was called.
  162. //
  163. // Use this as the primary entry point to you data,
  164. // this way queries are
  165. var CPU CPUInfo
  166. func init() {
  167. initCPU()
  168. Detect()
  169. }
  170. // Detect will re-detect current CPU info.
  171. // This will replace the content of the exported CPU variable.
  172. //
  173. // Unless you expect the CPU to change while you are running your program
  174. // you should not need to call this function.
  175. // If you call this, you must ensure that no other goroutine is accessing the
  176. // exported CPU variable.
  177. func Detect() {
  178. CPU.maxFunc = maxFunctionID()
  179. CPU.maxExFunc = maxExtendedFunction()
  180. CPU.BrandName = brandName()
  181. CPU.CacheLine = cacheLine()
  182. CPU.Family, CPU.Model = familyModel()
  183. CPU.Features = support()
  184. CPU.SGX = sgx(CPU.Features&SGX != 0)
  185. CPU.ThreadsPerCore = threadsPerCore()
  186. CPU.LogicalCores = logicalCores()
  187. CPU.PhysicalCores = physicalCores()
  188. CPU.VendorID = vendorID()
  189. CPU.HypervisorName = hypervisorVendorName()
  190. CPU.cacheSize()
  191. }
  192. // Generated here: http://play.golang.org/p/BxFH2Gdc0G
  193. // Cmov indicates support of CMOV instructions
  194. func (c CPUInfo) Cmov() bool {
  195. return c.Features&CMOV != 0
  196. }
  197. // Amd3dnow indicates support of AMD 3DNOW! instructions
  198. func (c CPUInfo) Amd3dnow() bool {
  199. return c.Features&AMD3DNOW != 0
  200. }
  201. // Amd3dnowExt indicates support of AMD 3DNOW! Extended instructions
  202. func (c CPUInfo) Amd3dnowExt() bool {
  203. return c.Features&AMD3DNOWEXT != 0
  204. }
  205. // MMX indicates support of MMX instructions
  206. func (c CPUInfo) MMX() bool {
  207. return c.Features&MMX != 0
  208. }
  209. // MMXExt indicates support of MMXEXT instructions
  210. // (SSE integer functions or AMD MMX ext)
  211. func (c CPUInfo) MMXExt() bool {
  212. return c.Features&MMXEXT != 0
  213. }
  214. // SSE indicates support of SSE instructions
  215. func (c CPUInfo) SSE() bool {
  216. return c.Features&SSE != 0
  217. }
  218. // SSE2 indicates support of SSE 2 instructions
  219. func (c CPUInfo) SSE2() bool {
  220. return c.Features&SSE2 != 0
  221. }
  222. // SSE3 indicates support of SSE 3 instructions
  223. func (c CPUInfo) SSE3() bool {
  224. return c.Features&SSE3 != 0
  225. }
  226. // SSSE3 indicates support of SSSE 3 instructions
  227. func (c CPUInfo) SSSE3() bool {
  228. return c.Features&SSSE3 != 0
  229. }
  230. // SSE4 indicates support of SSE 4 (also called SSE 4.1) instructions
  231. func (c CPUInfo) SSE4() bool {
  232. return c.Features&SSE4 != 0
  233. }
  234. // SSE42 indicates support of SSE4.2 instructions
  235. func (c CPUInfo) SSE42() bool {
  236. return c.Features&SSE42 != 0
  237. }
  238. // AVX indicates support of AVX instructions
  239. // and operating system support of AVX instructions
  240. func (c CPUInfo) AVX() bool {
  241. return c.Features&AVX != 0
  242. }
  243. // AVX2 indicates support of AVX2 instructions
  244. func (c CPUInfo) AVX2() bool {
  245. return c.Features&AVX2 != 0
  246. }
  247. // FMA3 indicates support of FMA3 instructions
  248. func (c CPUInfo) FMA3() bool {
  249. return c.Features&FMA3 != 0
  250. }
  251. // FMA4 indicates support of FMA4 instructions
  252. func (c CPUInfo) FMA4() bool {
  253. return c.Features&FMA4 != 0
  254. }
  255. // XOP indicates support of XOP instructions
  256. func (c CPUInfo) XOP() bool {
  257. return c.Features&XOP != 0
  258. }
  259. // F16C indicates support of F16C instructions
  260. func (c CPUInfo) F16C() bool {
  261. return c.Features&F16C != 0
  262. }
  263. // BMI1 indicates support of BMI1 instructions
  264. func (c CPUInfo) BMI1() bool {
  265. return c.Features&BMI1 != 0
  266. }
  267. // BMI2 indicates support of BMI2 instructions
  268. func (c CPUInfo) BMI2() bool {
  269. return c.Features&BMI2 != 0
  270. }
  271. // TBM indicates support of TBM instructions
  272. // (AMD Trailing Bit Manipulation)
  273. func (c CPUInfo) TBM() bool {
  274. return c.Features&TBM != 0
  275. }
  276. // Lzcnt indicates support of LZCNT instruction
  277. func (c CPUInfo) Lzcnt() bool {
  278. return c.Features&LZCNT != 0
  279. }
  280. // Popcnt indicates support of POPCNT instruction
  281. func (c CPUInfo) Popcnt() bool {
  282. return c.Features&POPCNT != 0
  283. }
  284. // HTT indicates the processor has Hyperthreading enabled
  285. func (c CPUInfo) HTT() bool {
  286. return c.Features&HTT != 0
  287. }
  288. // SSE2Slow indicates that SSE2 may be slow on this processor
  289. func (c CPUInfo) SSE2Slow() bool {
  290. return c.Features&SSE2SLOW != 0
  291. }
  292. // SSE3Slow indicates that SSE3 may be slow on this processor
  293. func (c CPUInfo) SSE3Slow() bool {
  294. return c.Features&SSE3SLOW != 0
  295. }
  296. // AesNi indicates support of AES-NI instructions
  297. // (Advanced Encryption Standard New Instructions)
  298. func (c CPUInfo) AesNi() bool {
  299. return c.Features&AESNI != 0
  300. }
  301. // Clmul indicates support of CLMUL instructions
  302. // (Carry-less Multiplication)
  303. func (c CPUInfo) Clmul() bool {
  304. return c.Features&CLMUL != 0
  305. }
  306. // NX indicates support of NX (No-Execute) bit
  307. func (c CPUInfo) NX() bool {
  308. return c.Features&NX != 0
  309. }
  310. // SSE4A indicates support of AMD Barcelona microarchitecture SSE4a instructions
  311. func (c CPUInfo) SSE4A() bool {
  312. return c.Features&SSE4A != 0
  313. }
  314. // HLE indicates support of Hardware Lock Elision
  315. func (c CPUInfo) HLE() bool {
  316. return c.Features&HLE != 0
  317. }
  318. // RTM indicates support of Restricted Transactional Memory
  319. func (c CPUInfo) RTM() bool {
  320. return c.Features&RTM != 0
  321. }
  322. // Rdrand indicates support of RDRAND instruction is available
  323. func (c CPUInfo) Rdrand() bool {
  324. return c.Features&RDRAND != 0
  325. }
  326. // Rdseed indicates support of RDSEED instruction is available
  327. func (c CPUInfo) Rdseed() bool {
  328. return c.Features&RDSEED != 0
  329. }
  330. // ADX indicates support of Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
  331. func (c CPUInfo) ADX() bool {
  332. return c.Features&ADX != 0
  333. }
  334. // SHA indicates support of Intel SHA Extensions
  335. func (c CPUInfo) SHA() bool {
  336. return c.Features&SHA != 0
  337. }
  338. // AVX512F indicates support of AVX-512 Foundation
  339. func (c CPUInfo) AVX512F() bool {
  340. return c.Features&AVX512F != 0
  341. }
  342. // AVX512DQ indicates support of AVX-512 Doubleword and Quadword Instructions
  343. func (c CPUInfo) AVX512DQ() bool {
  344. return c.Features&AVX512DQ != 0
  345. }
  346. // AVX512IFMA indicates support of AVX-512 Integer Fused Multiply-Add Instructions
  347. func (c CPUInfo) AVX512IFMA() bool {
  348. return c.Features&AVX512IFMA != 0
  349. }
  350. // AVX512PF indicates support of AVX-512 Prefetch Instructions
  351. func (c CPUInfo) AVX512PF() bool {
  352. return c.Features&AVX512PF != 0
  353. }
  354. // AVX512ER indicates support of AVX-512 Exponential and Reciprocal Instructions
  355. func (c CPUInfo) AVX512ER() bool {
  356. return c.Features&AVX512ER != 0
  357. }
  358. // AVX512CD indicates support of AVX-512 Conflict Detection Instructions
  359. func (c CPUInfo) AVX512CD() bool {
  360. return c.Features&AVX512CD != 0
  361. }
  362. // AVX512BW indicates support of AVX-512 Byte and Word Instructions
  363. func (c CPUInfo) AVX512BW() bool {
  364. return c.Features&AVX512BW != 0
  365. }
  366. // AVX512VL indicates support of AVX-512 Vector Length Extensions
  367. func (c CPUInfo) AVX512VL() bool {
  368. return c.Features&AVX512VL != 0
  369. }
  370. // AVX512VBMI indicates support of AVX-512 Vector Bit Manipulation Instructions
  371. func (c CPUInfo) AVX512VBMI() bool {
  372. return c.Features&AVX512VBMI != 0
  373. }
  374. // MPX indicates support of Intel MPX (Memory Protection Extensions)
  375. func (c CPUInfo) MPX() bool {
  376. return c.Features&MPX != 0
  377. }
  378. // ERMS indicates support of Enhanced REP MOVSB/STOSB
  379. func (c CPUInfo) ERMS() bool {
  380. return c.Features&ERMS != 0
  381. }
  382. func (c CPUInfo) RDTSCP() bool {
  383. return c.Features&RDTSCP != 0
  384. }
  385. func (c CPUInfo) CX16() bool {
  386. return c.Features&CX16 != 0
  387. }
  388. // Atom indicates an Atom processor
  389. func (c CPUInfo) Atom() bool {
  390. return c.Features&ATOM != 0
  391. }
  392. // Intel returns true if vendor is recognized as Intel
  393. func (c CPUInfo) Intel() bool {
  394. return c.VendorID == Intel
  395. }
  396. // AMD returns true if vendor is recognized as AMD
  397. func (c CPUInfo) AMD() bool {
  398. return c.VendorID == AMD
  399. }
  400. // Transmeta returns true if vendor is recognized as Transmeta
  401. func (c CPUInfo) Transmeta() bool {
  402. return c.VendorID == Transmeta
  403. }
  404. // NSC returns true if vendor is recognized as National Semiconductor
  405. func (c CPUInfo) NSC() bool {
  406. return c.VendorID == NSC
  407. }
  408. // VIA returns true if vendor is recognized as VIA
  409. func (c CPUInfo) VIA() bool {
  410. return c.VendorID == VIA
  411. }
  412. // RTCounter returns the 64-bit time-stamp counter
  413. // Uses the RDTSCP instruction. The value 0 is returned
  414. // if the CPU does not support the instruction.
  415. func (c CPUInfo) RTCounter() uint64 {
  416. if !c.RDTSCP() {
  417. return 0
  418. }
  419. a, _, _, d := rdtscpAsm()
  420. return uint64(a) | (uint64(d) << 32)
  421. }
  422. // Ia32TscAux returns the IA32_TSC_AUX part of the RDTSCP.
  423. // This variable is OS dependent, but on Linux contains information
  424. // about the current cpu/core the code is running on.
  425. // If the RDTSCP instruction isn't supported on the CPU, the value 0 is returned.
  426. func (c CPUInfo) Ia32TscAux() uint32 {
  427. if !c.RDTSCP() {
  428. return 0
  429. }
  430. _, _, ecx, _ := rdtscpAsm()
  431. return ecx
  432. }
  433. // LogicalCPU will return the Logical CPU the code is currently executing on.
  434. // This is likely to change when the OS re-schedules the running thread
  435. // to another CPU.
  436. // If the current core cannot be detected, -1 will be returned.
  437. func (c CPUInfo) LogicalCPU() int {
  438. if c.maxFunc < 1 {
  439. return -1
  440. }
  441. _, ebx, _, _ := cpuid(1)
  442. return int(ebx >> 24)
  443. }
  444. // VM Will return true if the cpu id indicates we are in
  445. // a virtual machine. This is only a hint, and will very likely
  446. // have many false negatives.
  447. func (c CPUInfo) VM() bool {
  448. switch c.VendorID {
  449. case MSVM, KVM, VMware, XenHVM:
  450. return true
  451. }
  452. return false
  453. }
  454. // Flags contains detected cpu features and caracteristics
  455. type Flags uint64
  456. // String returns a string representation of the detected
  457. // CPU features.
  458. func (f Flags) String() string {
  459. return strings.Join(f.Strings(), ",")
  460. }
  461. // Strings returns and array of the detected features.
  462. func (f Flags) Strings() []string {
  463. s := support()
  464. r := make([]string, 0, 20)
  465. for i := uint(0); i < 64; i++ {
  466. key := Flags(1 << i)
  467. val := flagNames[key]
  468. if s&key != 0 {
  469. r = append(r, val)
  470. }
  471. }
  472. return r
  473. }
  474. func maxExtendedFunction() uint32 {
  475. eax, _, _, _ := cpuid(0x80000000)
  476. return eax
  477. }
  478. func maxFunctionID() uint32 {
  479. a, _, _, _ := cpuid(0)
  480. return a
  481. }
  482. func isHypervisorActive() bool {
  483. _, _, i, _ := cpuid(0x1)
  484. return i&(1<<31) != 0
  485. }
  486. func getHypervisorCpuid(ax uint32) string {
  487. _, b, c, d := cpuid(ax)
  488. name := strings.TrimRight(string(valAsString(b, c, d)), "\000")
  489. return name
  490. }
  491. // see https://people.redhat.com/~rjones/virt-what/ for how full coverage is done
  492. // Vbox etc will need (optional - only do if root) dmi access
  493. func hypervisorName() string {
  494. if !isHypervisorActive() {
  495. return ""
  496. }
  497. // KVM has been caught to move its real signature to this leaf, and put something completely different in the
  498. // standard location. So this leaf must be checked first.
  499. // Sven removed it - in one test system, this leaf returns garbage :(
  500. //if hv := getHypervisorCpuid(0x40000100); hv != "" {
  501. // return hv
  502. //}
  503. return getHypervisorCpuid(0x40000000)
  504. }
  505. // https://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  506. var hvmap = map[string]string{
  507. "bhyve bhyve ": "bhyve",
  508. "KVMKVMKVM": "kvm",
  509. "Microsoft Hv": "hyperv",
  510. "VMwareVMware": "vmware",
  511. "XenVMMXenVMM": "xenhvm",
  512. }
  513. func hypervisorVendorName() string {
  514. name := hypervisorName()
  515. if n, ok := hvmap[name]; ok {
  516. return n
  517. }
  518. return name
  519. }
  520. func brandName() string {
  521. if maxExtendedFunction() >= 0x80000004 {
  522. v := make([]uint32, 0, 48)
  523. for i := uint32(0); i < 3; i++ {
  524. a, b, c, d := cpuid(0x80000002 + i)
  525. v = append(v, a, b, c, d)
  526. }
  527. return strings.Trim(string(valAsString(v...)), " ")
  528. }
  529. return "unknown"
  530. }
  531. func threadsPerCore() int {
  532. mfi := maxFunctionID()
  533. if mfi < 0x4 || vendorID() != Intel {
  534. return 1
  535. }
  536. if mfi < 0xb {
  537. _, b, _, d := cpuid(1)
  538. if (d & (1 << 28)) != 0 {
  539. // v will contain logical core count
  540. v := (b >> 16) & 255
  541. if v > 1 {
  542. a4, _, _, _ := cpuid(4)
  543. // physical cores
  544. v2 := (a4 >> 26) + 1
  545. if v2 > 0 {
  546. return int(v) / int(v2)
  547. }
  548. }
  549. }
  550. return 1
  551. }
  552. _, b, _, _ := cpuidex(0xb, 0)
  553. if b&0xffff == 0 {
  554. return 1
  555. }
  556. return int(b & 0xffff)
  557. }
  558. func logicalCores() int {
  559. mfi := maxFunctionID()
  560. switch vendorID() {
  561. case Intel:
  562. // Use this on old Intel processors
  563. if mfi < 0xb {
  564. if mfi < 1 {
  565. return 0
  566. }
  567. // CPUID.1:EBX[23:16] represents the maximum number of addressable IDs (initial APIC ID)
  568. // that can be assigned to logical processors in a physical package.
  569. // The value may not be the same as the number of logical processors that are present in the hardware of a physical package.
  570. _, ebx, _, _ := cpuid(1)
  571. logical := (ebx >> 16) & 0xff
  572. return int(logical)
  573. }
  574. _, b, _, _ := cpuidex(0xb, 1)
  575. return int(b & 0xffff)
  576. case AMD:
  577. _, b, _, _ := cpuid(1)
  578. return int((b >> 16) & 0xff)
  579. default:
  580. return 0
  581. }
  582. }
  583. func familyModel() (int, int) {
  584. if maxFunctionID() < 0x1 {
  585. return 0, 0
  586. }
  587. eax, _, _, _ := cpuid(1)
  588. family := ((eax >> 8) & 0xf) + ((eax >> 20) & 0xff)
  589. model := ((eax >> 4) & 0xf) + ((eax >> 12) & 0xf0)
  590. return int(family), int(model)
  591. }
  592. func physicalCores() int {
  593. switch vendorID() {
  594. case Intel:
  595. return logicalCores() / threadsPerCore()
  596. case AMD:
  597. if maxExtendedFunction() >= 0x80000008 {
  598. _, _, c, _ := cpuid(0x80000008)
  599. return int(c&0xff) + 1
  600. }
  601. }
  602. return 0
  603. }
  604. // Except from http://en.wikipedia.org/wiki/CPUID#EAX.3D0:_Get_vendor_ID
  605. var vendorMapping = map[string]Vendor{
  606. "AMDisbetter!": AMD,
  607. "AuthenticAMD": AMD,
  608. "CentaurHauls": VIA,
  609. "GenuineIntel": Intel,
  610. "TransmetaCPU": Transmeta,
  611. "GenuineTMx86": Transmeta,
  612. "Geode by NSC": NSC,
  613. "VIA VIA VIA ": VIA,
  614. "KVMKVMKVMKVM": KVM,
  615. "Microsoft Hv": MSVM,
  616. "VMwareVMware": VMware,
  617. "XenVMMXenVMM": XenHVM,
  618. }
  619. func vendorID() Vendor {
  620. _, b, c, d := cpuid(0)
  621. v := valAsString(b, d, c)
  622. vend, ok := vendorMapping[string(v)]
  623. if !ok {
  624. return Other
  625. }
  626. return vend
  627. }
  628. func cacheLine() int {
  629. if maxFunctionID() < 0x1 {
  630. return 0
  631. }
  632. _, ebx, _, _ := cpuid(1)
  633. cache := (ebx & 0xff00) >> 5 // cflush size
  634. if cache == 0 && maxExtendedFunction() >= 0x80000006 {
  635. _, _, ecx, _ := cpuid(0x80000006)
  636. cache = ecx & 0xff // cacheline size
  637. }
  638. // TODO: Read from Cache and TLB Information
  639. return int(cache)
  640. }
  641. func (c *CPUInfo) cacheSize() {
  642. c.Cache.L1D = -1
  643. c.Cache.L1I = -1
  644. c.Cache.L2 = -1
  645. c.Cache.L3 = -1
  646. vendor := vendorID()
  647. switch vendor {
  648. case Intel:
  649. if maxFunctionID() < 4 {
  650. return
  651. }
  652. for i := uint32(0); ; i++ {
  653. eax, ebx, ecx, _ := cpuidex(4, i)
  654. cacheType := eax & 15
  655. if cacheType == 0 {
  656. break
  657. }
  658. cacheLevel := (eax >> 5) & 7
  659. coherency := int(ebx&0xfff) + 1
  660. partitions := int((ebx>>12)&0x3ff) + 1
  661. associativity := int((ebx>>22)&0x3ff) + 1
  662. sets := int(ecx) + 1
  663. size := associativity * partitions * coherency * sets
  664. switch cacheLevel {
  665. case 1:
  666. if cacheType == 1 {
  667. // 1 = Data Cache
  668. c.Cache.L1D = size
  669. } else if cacheType == 2 {
  670. // 2 = Instruction Cache
  671. c.Cache.L1I = size
  672. } else {
  673. if c.Cache.L1D < 0 {
  674. c.Cache.L1I = size
  675. }
  676. if c.Cache.L1I < 0 {
  677. c.Cache.L1I = size
  678. }
  679. }
  680. case 2:
  681. c.Cache.L2 = size
  682. case 3:
  683. c.Cache.L3 = size
  684. }
  685. }
  686. case AMD:
  687. // Untested.
  688. if maxExtendedFunction() < 0x80000005 {
  689. return
  690. }
  691. _, _, ecx, edx := cpuid(0x80000005)
  692. c.Cache.L1D = int(((ecx >> 24) & 0xFF) * 1024)
  693. c.Cache.L1I = int(((edx >> 24) & 0xFF) * 1024)
  694. if maxExtendedFunction() < 0x80000006 {
  695. return
  696. }
  697. _, _, ecx, _ = cpuid(0x80000006)
  698. c.Cache.L2 = int(((ecx >> 16) & 0xFFFF) * 1024)
  699. }
  700. return
  701. }
  702. type SGXSupport struct {
  703. Available bool
  704. SGX1Supported bool
  705. SGX2Supported bool
  706. MaxEnclaveSizeNot64 int64
  707. MaxEnclaveSize64 int64
  708. }
  709. func sgx(available bool) (rval SGXSupport) {
  710. rval.Available = available
  711. if !available {
  712. return
  713. }
  714. a, _, _, d := cpuidex(0x12, 0)
  715. rval.SGX1Supported = a&0x01 != 0
  716. rval.SGX2Supported = a&0x02 != 0
  717. rval.MaxEnclaveSizeNot64 = 1 << (d & 0xFF) // pow 2
  718. rval.MaxEnclaveSize64 = 1 << ((d >> 8) & 0xFF) // pow 2
  719. return
  720. }
  721. func support() Flags {
  722. mfi := maxFunctionID()
  723. vend := vendorID()
  724. if mfi < 0x1 {
  725. return 0
  726. }
  727. rval := uint64(0)
  728. _, _, c, d := cpuid(1)
  729. if (d & (1 << 15)) != 0 {
  730. rval |= CMOV
  731. }
  732. if (d & (1 << 23)) != 0 {
  733. rval |= MMX
  734. }
  735. if (d & (1 << 25)) != 0 {
  736. rval |= MMXEXT
  737. }
  738. if (d & (1 << 25)) != 0 {
  739. rval |= SSE
  740. }
  741. if (d & (1 << 26)) != 0 {
  742. rval |= SSE2
  743. }
  744. if (c & 1) != 0 {
  745. rval |= SSE3
  746. }
  747. if (c & 0x00000200) != 0 {
  748. rval |= SSSE3
  749. }
  750. if (c & 0x00080000) != 0 {
  751. rval |= SSE4
  752. }
  753. if (c & 0x00100000) != 0 {
  754. rval |= SSE42
  755. }
  756. if (c & (1 << 25)) != 0 {
  757. rval |= AESNI
  758. }
  759. if (c & (1 << 1)) != 0 {
  760. rval |= CLMUL
  761. }
  762. if c&(1<<23) != 0 {
  763. rval |= POPCNT
  764. }
  765. if c&(1<<30) != 0 {
  766. rval |= RDRAND
  767. }
  768. if c&(1<<29) != 0 {
  769. rval |= F16C
  770. }
  771. if c&(1<<13) != 0 {
  772. rval |= CX16
  773. }
  774. if vend == Intel && (d&(1<<28)) != 0 && mfi >= 4 {
  775. if threadsPerCore() > 1 {
  776. rval |= HTT
  777. }
  778. }
  779. // Check XGETBV, OXSAVE and AVX bits
  780. if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 {
  781. // Check for OS support
  782. eax, _ := xgetbv(0)
  783. if (eax & 0x6) == 0x6 {
  784. rval |= AVX
  785. if (c & 0x00001000) != 0 {
  786. rval |= FMA3
  787. }
  788. }
  789. }
  790. // Check AVX2, AVX2 requires OS support, but BMI1/2 don't.
  791. if mfi >= 7 {
  792. _, ebx, ecx, _ := cpuidex(7, 0)
  793. if (rval&AVX) != 0 && (ebx&0x00000020) != 0 {
  794. rval |= AVX2
  795. }
  796. if (ebx & 0x00000008) != 0 {
  797. rval |= BMI1
  798. if (ebx & 0x00000100) != 0 {
  799. rval |= BMI2
  800. }
  801. }
  802. if ebx&(1<<2) != 0 {
  803. rval |= SGX
  804. }
  805. if ebx&(1<<4) != 0 {
  806. rval |= HLE
  807. }
  808. if ebx&(1<<9) != 0 {
  809. rval |= ERMS
  810. }
  811. if ebx&(1<<11) != 0 {
  812. rval |= RTM
  813. }
  814. if ebx&(1<<14) != 0 {
  815. rval |= MPX
  816. }
  817. if ebx&(1<<18) != 0 {
  818. rval |= RDSEED
  819. }
  820. if ebx&(1<<19) != 0 {
  821. rval |= ADX
  822. }
  823. if ebx&(1<<29) != 0 {
  824. rval |= SHA
  825. }
  826. // Only detect AVX-512 features if XGETBV is supported
  827. if c&((1<<26)|(1<<27)) == (1<<26)|(1<<27) {
  828. // Check for OS support
  829. eax, _ := xgetbv(0)
  830. // Verify that XCR0[7:5] = ‘111b’ (OPMASK state, upper 256-bit of ZMM0-ZMM15 and
  831. // ZMM16-ZMM31 state are enabled by OS)
  832. /// and that XCR0[2:1] = ‘11b’ (XMM state and YMM state are enabled by OS).
  833. if (eax>>5)&7 == 7 && (eax>>1)&3 == 3 {
  834. if ebx&(1<<16) != 0 {
  835. rval |= AVX512F
  836. }
  837. if ebx&(1<<17) != 0 {
  838. rval |= AVX512DQ
  839. }
  840. if ebx&(1<<21) != 0 {
  841. rval |= AVX512IFMA
  842. }
  843. if ebx&(1<<26) != 0 {
  844. rval |= AVX512PF
  845. }
  846. if ebx&(1<<27) != 0 {
  847. rval |= AVX512ER
  848. }
  849. if ebx&(1<<28) != 0 {
  850. rval |= AVX512CD
  851. }
  852. if ebx&(1<<30) != 0 {
  853. rval |= AVX512BW
  854. }
  855. if ebx&(1<<31) != 0 {
  856. rval |= AVX512VL
  857. }
  858. // ecx
  859. if ecx&(1<<1) != 0 {
  860. rval |= AVX512VBMI
  861. }
  862. }
  863. }
  864. }
  865. if maxExtendedFunction() >= 0x80000001 {
  866. _, _, c, d := cpuid(0x80000001)
  867. if (c & (1 << 5)) != 0 {
  868. rval |= LZCNT
  869. rval |= POPCNT
  870. }
  871. if (d & (1 << 31)) != 0 {
  872. rval |= AMD3DNOW
  873. }
  874. if (d & (1 << 30)) != 0 {
  875. rval |= AMD3DNOWEXT
  876. }
  877. if (d & (1 << 23)) != 0 {
  878. rval |= MMX
  879. }
  880. if (d & (1 << 22)) != 0 {
  881. rval |= MMXEXT
  882. }
  883. if (c & (1 << 6)) != 0 {
  884. rval |= SSE4A
  885. }
  886. if d&(1<<20) != 0 {
  887. rval |= NX
  888. }
  889. if d&(1<<27) != 0 {
  890. rval |= RDTSCP
  891. }
  892. /* Allow for selectively disabling SSE2 functions on AMD processors
  893. with SSE2 support but not SSE4a. This includes Athlon64, some
  894. Opteron, and some Sempron processors. MMX, SSE, or 3DNow! are faster
  895. than SSE2 often enough to utilize this special-case flag.
  896. AV_CPU_FLAG_SSE2 and AV_CPU_FLAG_SSE2SLOW are both set in this case
  897. so that SSE2 is used unless explicitly disabled by checking
  898. AV_CPU_FLAG_SSE2SLOW. */
  899. if vendorID() != Intel &&
  900. rval&SSE2 != 0 && (c&0x00000040) == 0 {
  901. rval |= SSE2SLOW
  902. }
  903. /* XOP and FMA4 use the AVX instruction coding scheme, so they can't be
  904. * used unless the OS has AVX support. */
  905. if (rval & AVX) != 0 {
  906. if (c & 0x00000800) != 0 {
  907. rval |= XOP
  908. }
  909. if (c & 0x00010000) != 0 {
  910. rval |= FMA4
  911. }
  912. }
  913. if vendorID() == Intel {
  914. family, model := familyModel()
  915. if family == 6 && (model == 9 || model == 13 || model == 14) {
  916. /* 6/9 (pentium-m "banias"), 6/13 (pentium-m "dothan"), and
  917. * 6/14 (core1 "yonah") theoretically support sse2, but it's
  918. * usually slower than mmx. */
  919. if (rval & SSE2) != 0 {
  920. rval |= SSE2SLOW
  921. }
  922. if (rval & SSE3) != 0 {
  923. rval |= SSE3SLOW
  924. }
  925. }
  926. /* The Atom processor has SSSE3 support, which is useful in many cases,
  927. * but sometimes the SSSE3 version is slower than the SSE2 equivalent
  928. * on the Atom, but is generally faster on other processors supporting
  929. * SSSE3. This flag allows for selectively disabling certain SSSE3
  930. * functions on the Atom. */
  931. if family == 6 && model == 28 {
  932. rval |= ATOM
  933. }
  934. }
  935. }
  936. return Flags(rval)
  937. }
  938. func valAsString(values ...uint32) []byte {
  939. r := make([]byte, 4*len(values))
  940. for i, v := range values {
  941. dst := r[i*4:]
  942. dst[0] = byte(v & 0xff)
  943. dst[1] = byte((v >> 8) & 0xff)
  944. dst[2] = byte((v >> 16) & 0xff)
  945. dst[3] = byte((v >> 24) & 0xff)
  946. switch {
  947. case dst[0] == 0:
  948. return r[:i*4]
  949. case dst[1] == 0:
  950. return r[:i*4+1]
  951. case dst[2] == 0:
  952. return r[:i*4+2]
  953. case dst[3] == 0:
  954. return r[:i*4+3]
  955. }
  956. }
  957. return r
  958. }