xref: /aosp_15_r20/external/zstd/lib/common/cpu.h (revision 01826a4963a0d8a59bc3812d29bdf0fb76416722)
1*01826a49SYabin Cui /*
2*01826a49SYabin Cui  * Copyright (c) Meta Platforms, Inc. and affiliates.
3*01826a49SYabin Cui  * All rights reserved.
4*01826a49SYabin Cui  *
5*01826a49SYabin Cui  * This source code is licensed under both the BSD-style license (found in the
6*01826a49SYabin Cui  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7*01826a49SYabin Cui  * in the COPYING file in the root directory of this source tree).
8*01826a49SYabin Cui  * You may select, at your option, one of the above-listed licenses.
9*01826a49SYabin Cui  */
10*01826a49SYabin Cui 
11*01826a49SYabin Cui #ifndef ZSTD_COMMON_CPU_H
12*01826a49SYabin Cui #define ZSTD_COMMON_CPU_H
13*01826a49SYabin Cui 
14*01826a49SYabin Cui /**
15*01826a49SYabin Cui  * Implementation taken from folly/CpuId.h
16*01826a49SYabin Cui  * https://github.com/facebook/folly/blob/master/folly/CpuId.h
17*01826a49SYabin Cui  */
18*01826a49SYabin Cui 
19*01826a49SYabin Cui #include "mem.h"
20*01826a49SYabin Cui 
21*01826a49SYabin Cui #ifdef _MSC_VER
22*01826a49SYabin Cui #include <intrin.h>
23*01826a49SYabin Cui #endif
24*01826a49SYabin Cui 
25*01826a49SYabin Cui typedef struct {
26*01826a49SYabin Cui     U32 f1c;
27*01826a49SYabin Cui     U32 f1d;
28*01826a49SYabin Cui     U32 f7b;
29*01826a49SYabin Cui     U32 f7c;
30*01826a49SYabin Cui } ZSTD_cpuid_t;
31*01826a49SYabin Cui 
ZSTD_cpuid(void)32*01826a49SYabin Cui MEM_STATIC ZSTD_cpuid_t ZSTD_cpuid(void) {
33*01826a49SYabin Cui     U32 f1c = 0;
34*01826a49SYabin Cui     U32 f1d = 0;
35*01826a49SYabin Cui     U32 f7b = 0;
36*01826a49SYabin Cui     U32 f7c = 0;
37*01826a49SYabin Cui #if defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
38*01826a49SYabin Cui #if !defined(__clang__)
39*01826a49SYabin Cui     int reg[4];
40*01826a49SYabin Cui     __cpuid((int*)reg, 0);
41*01826a49SYabin Cui     {
42*01826a49SYabin Cui         int const n = reg[0];
43*01826a49SYabin Cui         if (n >= 1) {
44*01826a49SYabin Cui             __cpuid((int*)reg, 1);
45*01826a49SYabin Cui             f1c = (U32)reg[2];
46*01826a49SYabin Cui             f1d = (U32)reg[3];
47*01826a49SYabin Cui         }
48*01826a49SYabin Cui         if (n >= 7) {
49*01826a49SYabin Cui             __cpuidex((int*)reg, 7, 0);
50*01826a49SYabin Cui             f7b = (U32)reg[1];
51*01826a49SYabin Cui             f7c = (U32)reg[2];
52*01826a49SYabin Cui         }
53*01826a49SYabin Cui     }
54*01826a49SYabin Cui #else
55*01826a49SYabin Cui     /* Clang compiler has a bug (fixed in https://reviews.llvm.org/D101338) in
56*01826a49SYabin Cui      * which the `__cpuid` intrinsic does not save and restore `rbx` as it needs
57*01826a49SYabin Cui      * to due to being a reserved register. So in that case, do the `cpuid`
58*01826a49SYabin Cui      * ourselves. Clang supports inline assembly anyway.
59*01826a49SYabin Cui      */
60*01826a49SYabin Cui     U32 n;
61*01826a49SYabin Cui     __asm__(
62*01826a49SYabin Cui         "pushq %%rbx\n\t"
63*01826a49SYabin Cui         "cpuid\n\t"
64*01826a49SYabin Cui         "popq %%rbx\n\t"
65*01826a49SYabin Cui         : "=a"(n)
66*01826a49SYabin Cui         : "a"(0)
67*01826a49SYabin Cui         : "rcx", "rdx");
68*01826a49SYabin Cui     if (n >= 1) {
69*01826a49SYabin Cui       U32 f1a;
70*01826a49SYabin Cui       __asm__(
71*01826a49SYabin Cui           "pushq %%rbx\n\t"
72*01826a49SYabin Cui           "cpuid\n\t"
73*01826a49SYabin Cui           "popq %%rbx\n\t"
74*01826a49SYabin Cui           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
75*01826a49SYabin Cui           : "a"(1)
76*01826a49SYabin Cui           :);
77*01826a49SYabin Cui     }
78*01826a49SYabin Cui     if (n >= 7) {
79*01826a49SYabin Cui       __asm__(
80*01826a49SYabin Cui           "pushq %%rbx\n\t"
81*01826a49SYabin Cui           "cpuid\n\t"
82*01826a49SYabin Cui           "movq %%rbx, %%rax\n\t"
83*01826a49SYabin Cui           "popq %%rbx"
84*01826a49SYabin Cui           : "=a"(f7b), "=c"(f7c)
85*01826a49SYabin Cui           : "a"(7), "c"(0)
86*01826a49SYabin Cui           : "rdx");
87*01826a49SYabin Cui     }
88*01826a49SYabin Cui #endif
89*01826a49SYabin Cui #elif defined(__i386__) && defined(__PIC__) && !defined(__clang__) && defined(__GNUC__)
90*01826a49SYabin Cui     /* The following block like the normal cpuid branch below, but gcc
91*01826a49SYabin Cui      * reserves ebx for use of its pic register so we must specially
92*01826a49SYabin Cui      * handle the save and restore to avoid clobbering the register
93*01826a49SYabin Cui      */
94*01826a49SYabin Cui     U32 n;
95*01826a49SYabin Cui     __asm__(
96*01826a49SYabin Cui         "pushl %%ebx\n\t"
97*01826a49SYabin Cui         "cpuid\n\t"
98*01826a49SYabin Cui         "popl %%ebx\n\t"
99*01826a49SYabin Cui         : "=a"(n)
100*01826a49SYabin Cui         : "a"(0)
101*01826a49SYabin Cui         : "ecx", "edx");
102*01826a49SYabin Cui     if (n >= 1) {
103*01826a49SYabin Cui       U32 f1a;
104*01826a49SYabin Cui       __asm__(
105*01826a49SYabin Cui           "pushl %%ebx\n\t"
106*01826a49SYabin Cui           "cpuid\n\t"
107*01826a49SYabin Cui           "popl %%ebx\n\t"
108*01826a49SYabin Cui           : "=a"(f1a), "=c"(f1c), "=d"(f1d)
109*01826a49SYabin Cui           : "a"(1));
110*01826a49SYabin Cui     }
111*01826a49SYabin Cui     if (n >= 7) {
112*01826a49SYabin Cui       __asm__(
113*01826a49SYabin Cui           "pushl %%ebx\n\t"
114*01826a49SYabin Cui           "cpuid\n\t"
115*01826a49SYabin Cui           "movl %%ebx, %%eax\n\t"
116*01826a49SYabin Cui           "popl %%ebx"
117*01826a49SYabin Cui           : "=a"(f7b), "=c"(f7c)
118*01826a49SYabin Cui           : "a"(7), "c"(0)
119*01826a49SYabin Cui           : "edx");
120*01826a49SYabin Cui     }
121*01826a49SYabin Cui #elif defined(__x86_64__) || defined(_M_X64) || defined(__i386__)
122*01826a49SYabin Cui     U32 n;
123*01826a49SYabin Cui     __asm__("cpuid" : "=a"(n) : "a"(0) : "ebx", "ecx", "edx");
124*01826a49SYabin Cui     if (n >= 1) {
125*01826a49SYabin Cui       U32 f1a;
126*01826a49SYabin Cui       __asm__("cpuid" : "=a"(f1a), "=c"(f1c), "=d"(f1d) : "a"(1) : "ebx");
127*01826a49SYabin Cui     }
128*01826a49SYabin Cui     if (n >= 7) {
129*01826a49SYabin Cui       U32 f7a;
130*01826a49SYabin Cui       __asm__("cpuid"
131*01826a49SYabin Cui               : "=a"(f7a), "=b"(f7b), "=c"(f7c)
132*01826a49SYabin Cui               : "a"(7), "c"(0)
133*01826a49SYabin Cui               : "edx");
134*01826a49SYabin Cui     }
135*01826a49SYabin Cui #endif
136*01826a49SYabin Cui     {
137*01826a49SYabin Cui         ZSTD_cpuid_t cpuid;
138*01826a49SYabin Cui         cpuid.f1c = f1c;
139*01826a49SYabin Cui         cpuid.f1d = f1d;
140*01826a49SYabin Cui         cpuid.f7b = f7b;
141*01826a49SYabin Cui         cpuid.f7c = f7c;
142*01826a49SYabin Cui         return cpuid;
143*01826a49SYabin Cui     }
144*01826a49SYabin Cui }
145*01826a49SYabin Cui 
146*01826a49SYabin Cui #define X(name, r, bit)                                                        \
147*01826a49SYabin Cui   MEM_STATIC int ZSTD_cpuid_##name(ZSTD_cpuid_t const cpuid) {                 \
148*01826a49SYabin Cui     return ((cpuid.r) & (1U << bit)) != 0;                                     \
149*01826a49SYabin Cui   }
150*01826a49SYabin Cui 
151*01826a49SYabin Cui /* cpuid(1): Processor Info and Feature Bits. */
152*01826a49SYabin Cui #define C(name, bit) X(name, f1c, bit)
153*01826a49SYabin Cui   C(sse3, 0)
154*01826a49SYabin Cui   C(pclmuldq, 1)
155*01826a49SYabin Cui   C(dtes64, 2)
156*01826a49SYabin Cui   C(monitor, 3)
157*01826a49SYabin Cui   C(dscpl, 4)
158*01826a49SYabin Cui   C(vmx, 5)
159*01826a49SYabin Cui   C(smx, 6)
160*01826a49SYabin Cui   C(eist, 7)
161*01826a49SYabin Cui   C(tm2, 8)
162*01826a49SYabin Cui   C(ssse3, 9)
163*01826a49SYabin Cui   C(cnxtid, 10)
164*01826a49SYabin Cui   C(fma, 12)
165*01826a49SYabin Cui   C(cx16, 13)
166*01826a49SYabin Cui   C(xtpr, 14)
167*01826a49SYabin Cui   C(pdcm, 15)
168*01826a49SYabin Cui   C(pcid, 17)
169*01826a49SYabin Cui   C(dca, 18)
170*01826a49SYabin Cui   C(sse41, 19)
171*01826a49SYabin Cui   C(sse42, 20)
172*01826a49SYabin Cui   C(x2apic, 21)
173*01826a49SYabin Cui   C(movbe, 22)
174*01826a49SYabin Cui   C(popcnt, 23)
175*01826a49SYabin Cui   C(tscdeadline, 24)
176*01826a49SYabin Cui   C(aes, 25)
177*01826a49SYabin Cui   C(xsave, 26)
178*01826a49SYabin Cui   C(osxsave, 27)
179*01826a49SYabin Cui   C(avx, 28)
180*01826a49SYabin Cui   C(f16c, 29)
181*01826a49SYabin Cui   C(rdrand, 30)
182*01826a49SYabin Cui #undef C
183*01826a49SYabin Cui #define D(name, bit) X(name, f1d, bit)
184*01826a49SYabin Cui   D(fpu, 0)
185*01826a49SYabin Cui   D(vme, 1)
186*01826a49SYabin Cui   D(de, 2)
187*01826a49SYabin Cui   D(pse, 3)
188*01826a49SYabin Cui   D(tsc, 4)
189*01826a49SYabin Cui   D(msr, 5)
190*01826a49SYabin Cui   D(pae, 6)
191*01826a49SYabin Cui   D(mce, 7)
192*01826a49SYabin Cui   D(cx8, 8)
193*01826a49SYabin Cui   D(apic, 9)
194*01826a49SYabin Cui   D(sep, 11)
195*01826a49SYabin Cui   D(mtrr, 12)
196*01826a49SYabin Cui   D(pge, 13)
197*01826a49SYabin Cui   D(mca, 14)
198*01826a49SYabin Cui   D(cmov, 15)
199*01826a49SYabin Cui   D(pat, 16)
200*01826a49SYabin Cui   D(pse36, 17)
201*01826a49SYabin Cui   D(psn, 18)
202*01826a49SYabin Cui   D(clfsh, 19)
203*01826a49SYabin Cui   D(ds, 21)
204*01826a49SYabin Cui   D(acpi, 22)
205*01826a49SYabin Cui   D(mmx, 23)
206*01826a49SYabin Cui   D(fxsr, 24)
207*01826a49SYabin Cui   D(sse, 25)
208*01826a49SYabin Cui   D(sse2, 26)
209*01826a49SYabin Cui   D(ss, 27)
210*01826a49SYabin Cui   D(htt, 28)
211*01826a49SYabin Cui   D(tm, 29)
212*01826a49SYabin Cui   D(pbe, 31)
213*01826a49SYabin Cui #undef D
214*01826a49SYabin Cui 
215*01826a49SYabin Cui /* cpuid(7): Extended Features. */
216*01826a49SYabin Cui #define B(name, bit) X(name, f7b, bit)
217*01826a49SYabin Cui   B(bmi1, 3)
218*01826a49SYabin Cui   B(hle, 4)
219*01826a49SYabin Cui   B(avx2, 5)
220*01826a49SYabin Cui   B(smep, 7)
221*01826a49SYabin Cui   B(bmi2, 8)
222*01826a49SYabin Cui   B(erms, 9)
223*01826a49SYabin Cui   B(invpcid, 10)
224*01826a49SYabin Cui   B(rtm, 11)
225*01826a49SYabin Cui   B(mpx, 14)
226*01826a49SYabin Cui   B(avx512f, 16)
227*01826a49SYabin Cui   B(avx512dq, 17)
228*01826a49SYabin Cui   B(rdseed, 18)
229*01826a49SYabin Cui   B(adx, 19)
230*01826a49SYabin Cui   B(smap, 20)
231*01826a49SYabin Cui   B(avx512ifma, 21)
232*01826a49SYabin Cui   B(pcommit, 22)
233*01826a49SYabin Cui   B(clflushopt, 23)
234*01826a49SYabin Cui   B(clwb, 24)
235*01826a49SYabin Cui   B(avx512pf, 26)
236*01826a49SYabin Cui   B(avx512er, 27)
237*01826a49SYabin Cui   B(avx512cd, 28)
238*01826a49SYabin Cui   B(sha, 29)
239*01826a49SYabin Cui   B(avx512bw, 30)
240*01826a49SYabin Cui   B(avx512vl, 31)
241*01826a49SYabin Cui #undef B
242*01826a49SYabin Cui #define C(name, bit) X(name, f7c, bit)
243*01826a49SYabin Cui   C(prefetchwt1, 0)
244*01826a49SYabin Cui   C(avx512vbmi, 1)
245*01826a49SYabin Cui #undef C
246*01826a49SYabin Cui 
247*01826a49SYabin Cui #undef X
248*01826a49SYabin Cui 
249*01826a49SYabin Cui #endif /* ZSTD_COMMON_CPU_H */
250