1*2b54f0dbSXin Li #include <stdint.h>
2*2b54f0dbSXin Li #include <stddef.h>
3*2b54f0dbSXin Li #include <stdlib.h>
4*2b54f0dbSXin Li #include <string.h>
5*2b54f0dbSXin Li
6*2b54f0dbSXin Li #include <cpuinfo.h>
7*2b54f0dbSXin Li #include <x86/api.h>
8*2b54f0dbSXin Li #include <cpuinfo/internal-api.h>
9*2b54f0dbSXin Li #include <cpuinfo/log.h>
10*2b54f0dbSXin Li
11*2b54f0dbSXin Li #include <windows.h>
12*2b54f0dbSXin Li
13*2b54f0dbSXin Li #ifdef __GNUC__
14*2b54f0dbSXin Li #define CPUINFO_ALLOCA __builtin_alloca
15*2b54f0dbSXin Li #else
16*2b54f0dbSXin Li #define CPUINFO_ALLOCA _alloca
17*2b54f0dbSXin Li #endif
18*2b54f0dbSXin Li
19*2b54f0dbSXin Li
bit_mask(uint32_t bits)20*2b54f0dbSXin Li static inline uint32_t bit_mask(uint32_t bits) {
21*2b54f0dbSXin Li return (UINT32_C(1) << bits) - UINT32_C(1);
22*2b54f0dbSXin Li }
23*2b54f0dbSXin Li
low_index_from_kaffinity(KAFFINITY kaffinity)24*2b54f0dbSXin Li static inline uint32_t low_index_from_kaffinity(KAFFINITY kaffinity) {
25*2b54f0dbSXin Li #if defined(_M_X64) || defined(_M_AMD64)
26*2b54f0dbSXin Li unsigned long index;
27*2b54f0dbSXin Li _BitScanForward64(&index, (unsigned __int64) kaffinity);
28*2b54f0dbSXin Li return (uint32_t) index;
29*2b54f0dbSXin Li #elif defined(_M_IX86)
30*2b54f0dbSXin Li unsigned long index;
31*2b54f0dbSXin Li _BitScanForward(&index, (unsigned long) kaffinity);
32*2b54f0dbSXin Li return (uint32_t) index;
33*2b54f0dbSXin Li #else
34*2b54f0dbSXin Li #error Platform-specific implementation required
35*2b54f0dbSXin Li #endif
36*2b54f0dbSXin Li }
37*2b54f0dbSXin Li
cpuinfo_x86_count_caches(uint32_t processors_count,const struct cpuinfo_processor * processors,const struct cpuinfo_x86_processor * x86_processor,uint32_t * l1i_count_ptr,uint32_t * l1d_count_ptr,uint32_t * l2_count_ptr,uint32_t * l3_count_ptr,uint32_t * l4_count_ptr)38*2b54f0dbSXin Li static void cpuinfo_x86_count_caches(
39*2b54f0dbSXin Li uint32_t processors_count,
40*2b54f0dbSXin Li const struct cpuinfo_processor* processors,
41*2b54f0dbSXin Li const struct cpuinfo_x86_processor* x86_processor,
42*2b54f0dbSXin Li uint32_t* l1i_count_ptr,
43*2b54f0dbSXin Li uint32_t* l1d_count_ptr,
44*2b54f0dbSXin Li uint32_t* l2_count_ptr,
45*2b54f0dbSXin Li uint32_t* l3_count_ptr,
46*2b54f0dbSXin Li uint32_t* l4_count_ptr)
47*2b54f0dbSXin Li {
48*2b54f0dbSXin Li uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
49*2b54f0dbSXin Li uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
50*2b54f0dbSXin Li uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
51*2b54f0dbSXin Li for (uint32_t i = 0; i < processors_count; i++) {
52*2b54f0dbSXin Li const uint32_t apic_id = processors[i].apic_id;
53*2b54f0dbSXin Li cpuinfo_log_debug("APID ID %"PRIu32": logical processor %"PRIu32, apic_id, i);
54*2b54f0dbSXin Li
55*2b54f0dbSXin Li if (x86_processor->cache.l1i.size != 0) {
56*2b54f0dbSXin Li const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor->cache.l1i.apic_bits);
57*2b54f0dbSXin Li if (l1i_id != last_l1i_id) {
58*2b54f0dbSXin Li last_l1i_id = l1i_id;
59*2b54f0dbSXin Li l1i_count++;
60*2b54f0dbSXin Li }
61*2b54f0dbSXin Li }
62*2b54f0dbSXin Li if (x86_processor->cache.l1d.size != 0) {
63*2b54f0dbSXin Li const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor->cache.l1d.apic_bits);
64*2b54f0dbSXin Li if (l1d_id != last_l1d_id) {
65*2b54f0dbSXin Li last_l1d_id = l1d_id;
66*2b54f0dbSXin Li l1d_count++;
67*2b54f0dbSXin Li }
68*2b54f0dbSXin Li }
69*2b54f0dbSXin Li if (x86_processor->cache.l2.size != 0) {
70*2b54f0dbSXin Li const uint32_t l2_id = apic_id & ~bit_mask(x86_processor->cache.l2.apic_bits);
71*2b54f0dbSXin Li if (l2_id != last_l2_id) {
72*2b54f0dbSXin Li last_l2_id = l2_id;
73*2b54f0dbSXin Li l2_count++;
74*2b54f0dbSXin Li }
75*2b54f0dbSXin Li }
76*2b54f0dbSXin Li if (x86_processor->cache.l3.size != 0) {
77*2b54f0dbSXin Li const uint32_t l3_id = apic_id & ~bit_mask(x86_processor->cache.l3.apic_bits);
78*2b54f0dbSXin Li if (l3_id != last_l3_id) {
79*2b54f0dbSXin Li last_l3_id = l3_id;
80*2b54f0dbSXin Li l3_count++;
81*2b54f0dbSXin Li }
82*2b54f0dbSXin Li }
83*2b54f0dbSXin Li if (x86_processor->cache.l4.size != 0) {
84*2b54f0dbSXin Li const uint32_t l4_id = apic_id & ~bit_mask(x86_processor->cache.l4.apic_bits);
85*2b54f0dbSXin Li if (l4_id != last_l4_id) {
86*2b54f0dbSXin Li last_l4_id = l4_id;
87*2b54f0dbSXin Li l4_count++;
88*2b54f0dbSXin Li }
89*2b54f0dbSXin Li }
90*2b54f0dbSXin Li }
91*2b54f0dbSXin Li *l1i_count_ptr = l1i_count;
92*2b54f0dbSXin Li *l1d_count_ptr = l1d_count;
93*2b54f0dbSXin Li *l2_count_ptr = l2_count;
94*2b54f0dbSXin Li *l3_count_ptr = l3_count;
95*2b54f0dbSXin Li *l4_count_ptr = l4_count;
96*2b54f0dbSXin Li }
97*2b54f0dbSXin Li
cpuinfo_x86_windows_is_wine(void)98*2b54f0dbSXin Li static bool cpuinfo_x86_windows_is_wine(void) {
99*2b54f0dbSXin Li HMODULE ntdll = GetModuleHandleW(L"ntdll.dll");
100*2b54f0dbSXin Li if (ntdll == NULL) {
101*2b54f0dbSXin Li return false;
102*2b54f0dbSXin Li }
103*2b54f0dbSXin Li
104*2b54f0dbSXin Li return GetProcAddress(ntdll, "wine_get_version") != NULL;
105*2b54f0dbSXin Li }
106*2b54f0dbSXin Li
cpuinfo_x86_windows_init(PINIT_ONCE init_once,PVOID parameter,PVOID * context)107*2b54f0dbSXin Li BOOL CALLBACK cpuinfo_x86_windows_init(PINIT_ONCE init_once, PVOID parameter, PVOID* context) {
108*2b54f0dbSXin Li struct cpuinfo_processor* processors = NULL;
109*2b54f0dbSXin Li struct cpuinfo_core* cores = NULL;
110*2b54f0dbSXin Li struct cpuinfo_cluster* clusters = NULL;
111*2b54f0dbSXin Li struct cpuinfo_package* packages = NULL;
112*2b54f0dbSXin Li struct cpuinfo_cache* l1i = NULL;
113*2b54f0dbSXin Li struct cpuinfo_cache* l1d = NULL;
114*2b54f0dbSXin Li struct cpuinfo_cache* l2 = NULL;
115*2b54f0dbSXin Li struct cpuinfo_cache* l3 = NULL;
116*2b54f0dbSXin Li struct cpuinfo_cache* l4 = NULL;
117*2b54f0dbSXin Li PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX processor_infos = NULL;
118*2b54f0dbSXin Li
119*2b54f0dbSXin Li HANDLE heap = GetProcessHeap();
120*2b54f0dbSXin Li const bool is_wine = cpuinfo_x86_windows_is_wine();
121*2b54f0dbSXin Li
122*2b54f0dbSXin Li struct cpuinfo_x86_processor x86_processor;
123*2b54f0dbSXin Li ZeroMemory(&x86_processor, sizeof(x86_processor));
124*2b54f0dbSXin Li cpuinfo_x86_init_processor(&x86_processor);
125*2b54f0dbSXin Li char brand_string[48];
126*2b54f0dbSXin Li cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
127*2b54f0dbSXin Li
128*2b54f0dbSXin Li const uint32_t thread_bits_mask = bit_mask(x86_processor.topology.thread_bits_length);
129*2b54f0dbSXin Li const uint32_t core_bits_mask = bit_mask(x86_processor.topology.core_bits_length);
130*2b54f0dbSXin Li const uint32_t package_bits_offset = max(
131*2b54f0dbSXin Li x86_processor.topology.thread_bits_offset + x86_processor.topology.thread_bits_length,
132*2b54f0dbSXin Li x86_processor.topology.core_bits_offset + x86_processor.topology.core_bits_length);
133*2b54f0dbSXin Li
134*2b54f0dbSXin Li /* WINE doesn't implement GetMaximumProcessorGroupCount and aborts when calling it */
135*2b54f0dbSXin Li const uint32_t max_group_count = is_wine ? 1 : (uint32_t) GetMaximumProcessorGroupCount();
136*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" processor groups", max_group_count);
137*2b54f0dbSXin Li
138*2b54f0dbSXin Li uint32_t processors_count = 0;
139*2b54f0dbSXin Li uint32_t* processors_per_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
140*2b54f0dbSXin Li for (uint32_t i = 0; i < max_group_count; i++) {
141*2b54f0dbSXin Li processors_per_group[i] = GetMaximumProcessorCount((WORD) i);
142*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" processors in group %"PRIu32,
143*2b54f0dbSXin Li processors_per_group[i], i);
144*2b54f0dbSXin Li processors_count += processors_per_group[i];
145*2b54f0dbSXin Li }
146*2b54f0dbSXin Li
147*2b54f0dbSXin Li uint32_t* processors_before_group = (uint32_t*) CPUINFO_ALLOCA(max_group_count * sizeof(uint32_t));
148*2b54f0dbSXin Li for (uint32_t i = 0, count = 0; i < max_group_count; i++) {
149*2b54f0dbSXin Li processors_before_group[i] = count;
150*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" processors before group %"PRIu32,
151*2b54f0dbSXin Li processors_before_group[i], i);
152*2b54f0dbSXin Li count += processors_per_group[i];
153*2b54f0dbSXin Li }
154*2b54f0dbSXin Li
155*2b54f0dbSXin Li processors = HeapAlloc(heap, HEAP_ZERO_MEMORY, processors_count * sizeof(struct cpuinfo_processor));
156*2b54f0dbSXin Li if (processors == NULL) {
157*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
158*2b54f0dbSXin Li processors_count * sizeof(struct cpuinfo_processor), processors_count);
159*2b54f0dbSXin Li goto cleanup;
160*2b54f0dbSXin Li }
161*2b54f0dbSXin Li
162*2b54f0dbSXin Li DWORD cores_info_size = 0;
163*2b54f0dbSXin Li if (GetLogicalProcessorInformationEx(RelationProcessorCore, NULL, &cores_info_size) == FALSE) {
164*2b54f0dbSXin Li const DWORD last_error = GetLastError();
165*2b54f0dbSXin Li if (last_error != ERROR_INSUFFICIENT_BUFFER) {
166*2b54f0dbSXin Li cpuinfo_log_error("failed to query size of processor cores information: error %"PRIu32,
167*2b54f0dbSXin Li (uint32_t) last_error);
168*2b54f0dbSXin Li goto cleanup;
169*2b54f0dbSXin Li }
170*2b54f0dbSXin Li }
171*2b54f0dbSXin Li
172*2b54f0dbSXin Li DWORD packages_info_size = 0;
173*2b54f0dbSXin Li if (GetLogicalProcessorInformationEx(RelationProcessorPackage, NULL, &packages_info_size) == FALSE) {
174*2b54f0dbSXin Li const DWORD last_error = GetLastError();
175*2b54f0dbSXin Li if (last_error != ERROR_INSUFFICIENT_BUFFER) {
176*2b54f0dbSXin Li cpuinfo_log_error("failed to query size of processor packages information: error %"PRIu32,
177*2b54f0dbSXin Li (uint32_t) last_error);
178*2b54f0dbSXin Li goto cleanup;
179*2b54f0dbSXin Li }
180*2b54f0dbSXin Li }
181*2b54f0dbSXin Li
182*2b54f0dbSXin Li DWORD max_info_size = max(cores_info_size, packages_info_size);
183*2b54f0dbSXin Li
184*2b54f0dbSXin Li processor_infos = HeapAlloc(heap, 0, max_info_size);
185*2b54f0dbSXin Li if (processor_infos == NULL) {
186*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %"PRIu32" bytes for logical processor information",
187*2b54f0dbSXin Li (uint32_t) max_info_size);
188*2b54f0dbSXin Li goto cleanup;
189*2b54f0dbSXin Li }
190*2b54f0dbSXin Li
191*2b54f0dbSXin Li if (GetLogicalProcessorInformationEx(RelationProcessorPackage, processor_infos, &max_info_size) == FALSE) {
192*2b54f0dbSXin Li cpuinfo_log_error("failed to query processor packages information: error %"PRIu32,
193*2b54f0dbSXin Li (uint32_t) GetLastError());
194*2b54f0dbSXin Li goto cleanup;
195*2b54f0dbSXin Li }
196*2b54f0dbSXin Li
197*2b54f0dbSXin Li uint32_t packages_count = 0;
198*2b54f0dbSXin Li PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX packages_info_end =
199*2b54f0dbSXin Li (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + packages_info_size);
200*2b54f0dbSXin Li for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX package_info = processor_infos;
201*2b54f0dbSXin Li package_info < packages_info_end;
202*2b54f0dbSXin Li package_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) package_info + package_info->Size))
203*2b54f0dbSXin Li {
204*2b54f0dbSXin Li if (package_info->Relationship != RelationProcessorPackage) {
205*2b54f0dbSXin Li cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor package information",
206*2b54f0dbSXin Li (uint32_t) package_info->Relationship);
207*2b54f0dbSXin Li continue;
208*2b54f0dbSXin Li }
209*2b54f0dbSXin Li
210*2b54f0dbSXin Li /* We assume that packages are reported in APIC order */
211*2b54f0dbSXin Li const uint32_t package_id = packages_count++;
212*2b54f0dbSXin Li /* Reconstruct package part of APIC ID */
213*2b54f0dbSXin Li const uint32_t package_apic_id = package_id << package_bits_offset;
214*2b54f0dbSXin Li /* Iterate processor groups and set the package part of APIC ID */
215*2b54f0dbSXin Li for (uint32_t i = 0; i < package_info->Processor.GroupCount; i++) {
216*2b54f0dbSXin Li const uint32_t group_id = package_info->Processor.GroupMask[i].Group;
217*2b54f0dbSXin Li /* Global index of the first logical processor belonging to this group */
218*2b54f0dbSXin Li const uint32_t group_processors_start = processors_before_group[group_id];
219*2b54f0dbSXin Li /* Bitmask representing processors in this group belonging to this package */
220*2b54f0dbSXin Li KAFFINITY group_processors_mask = package_info->Processor.GroupMask[i].Mask;
221*2b54f0dbSXin Li while (group_processors_mask != 0) {
222*2b54f0dbSXin Li const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
223*2b54f0dbSXin Li const uint32_t processor_id = group_processors_start + group_processor_id;
224*2b54f0dbSXin Li processors[processor_id].package = (const struct cpuinfo_package*) NULL + package_id;
225*2b54f0dbSXin Li processors[processor_id].windows_group_id = (uint16_t) group_id;
226*2b54f0dbSXin Li processors[processor_id].windows_processor_id = (uint16_t) group_processor_id;
227*2b54f0dbSXin Li processors[processor_id].apic_id = package_apic_id;
228*2b54f0dbSXin Li
229*2b54f0dbSXin Li /* Reset the lowest bit in affinity mask */
230*2b54f0dbSXin Li group_processors_mask &= (group_processors_mask - 1);
231*2b54f0dbSXin Li }
232*2b54f0dbSXin Li }
233*2b54f0dbSXin Li }
234*2b54f0dbSXin Li
235*2b54f0dbSXin Li max_info_size = max(cores_info_size, packages_info_size);
236*2b54f0dbSXin Li if (GetLogicalProcessorInformationEx(RelationProcessorCore, processor_infos, &max_info_size) == FALSE) {
237*2b54f0dbSXin Li cpuinfo_log_error("failed to query processor cores information: error %"PRIu32,
238*2b54f0dbSXin Li (uint32_t) GetLastError());
239*2b54f0dbSXin Li goto cleanup;
240*2b54f0dbSXin Li }
241*2b54f0dbSXin Li
242*2b54f0dbSXin Li uint32_t cores_count = 0;
243*2b54f0dbSXin Li /* Index (among all cores) of the the first core on the current package */
244*2b54f0dbSXin Li uint32_t package_core_start = 0;
245*2b54f0dbSXin Li uint32_t current_package_apic_id = 0;
246*2b54f0dbSXin Li PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX cores_info_end =
247*2b54f0dbSXin Li (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) processor_infos + cores_info_size);
248*2b54f0dbSXin Li for (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX core_info = processor_infos;
249*2b54f0dbSXin Li core_info < cores_info_end;
250*2b54f0dbSXin Li core_info = (PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX) ((uintptr_t) core_info + core_info->Size))
251*2b54f0dbSXin Li {
252*2b54f0dbSXin Li if (core_info->Relationship != RelationProcessorCore) {
253*2b54f0dbSXin Li cpuinfo_log_warning("unexpected processor info type (%"PRIu32") for processor core information",
254*2b54f0dbSXin Li (uint32_t) core_info->Relationship);
255*2b54f0dbSXin Li continue;
256*2b54f0dbSXin Li }
257*2b54f0dbSXin Li
258*2b54f0dbSXin Li /* We assume that cores and logical processors are reported in APIC order */
259*2b54f0dbSXin Li const uint32_t core_id = cores_count++;
260*2b54f0dbSXin Li uint32_t smt_id = 0;
261*2b54f0dbSXin Li /* Reconstruct core part of APIC ID */
262*2b54f0dbSXin Li const uint32_t core_apic_id = (core_id & core_bits_mask) << x86_processor.topology.core_bits_offset;
263*2b54f0dbSXin Li /* Iterate processor groups and set the core & SMT parts of APIC ID */
264*2b54f0dbSXin Li for (uint32_t i = 0; i < core_info->Processor.GroupCount; i++) {
265*2b54f0dbSXin Li const uint32_t group_id = core_info->Processor.GroupMask[i].Group;
266*2b54f0dbSXin Li /* Global index of the first logical processor belonging to this group */
267*2b54f0dbSXin Li const uint32_t group_processors_start = processors_before_group[group_id];
268*2b54f0dbSXin Li /* Bitmask representing processors in this group belonging to this package */
269*2b54f0dbSXin Li KAFFINITY group_processors_mask = core_info->Processor.GroupMask[i].Mask;
270*2b54f0dbSXin Li while (group_processors_mask != 0) {
271*2b54f0dbSXin Li const uint32_t group_processor_id = low_index_from_kaffinity(group_processors_mask);
272*2b54f0dbSXin Li const uint32_t processor_id = group_processors_start + group_processor_id;
273*2b54f0dbSXin Li
274*2b54f0dbSXin Li /* Check if this is the first core on a new package */
275*2b54f0dbSXin Li if (processors[processor_id].apic_id != current_package_apic_id) {
276*2b54f0dbSXin Li package_core_start = core_id;
277*2b54f0dbSXin Li current_package_apic_id = processors[processor_id].apic_id;
278*2b54f0dbSXin Li }
279*2b54f0dbSXin Li /* Core ID w.r.t package */
280*2b54f0dbSXin Li const uint32_t package_core_id = core_id - package_core_start;
281*2b54f0dbSXin Li
282*2b54f0dbSXin Li /* Update APIC ID with core and SMT parts */
283*2b54f0dbSXin Li processors[processor_id].apic_id |=
284*2b54f0dbSXin Li ((smt_id & thread_bits_mask) << x86_processor.topology.thread_bits_offset) |
285*2b54f0dbSXin Li ((package_core_id & core_bits_mask) << x86_processor.topology.core_bits_offset);
286*2b54f0dbSXin Li cpuinfo_log_debug("reconstructed APIC ID 0x%08"PRIx32" for processor %"PRIu32" in group %"PRIu32,
287*2b54f0dbSXin Li processors[processor_id].apic_id, group_processor_id, group_id);
288*2b54f0dbSXin Li
289*2b54f0dbSXin Li /* Set SMT ID (assume logical processors within the core are reported in APIC order) */
290*2b54f0dbSXin Li processors[processor_id].smt_id = smt_id++;
291*2b54f0dbSXin Li processors[processor_id].core = (const struct cpuinfo_core*) NULL + core_id;
292*2b54f0dbSXin Li
293*2b54f0dbSXin Li /* Reset the lowest bit in affinity mask */
294*2b54f0dbSXin Li group_processors_mask &= (group_processors_mask - 1);
295*2b54f0dbSXin Li }
296*2b54f0dbSXin Li }
297*2b54f0dbSXin Li }
298*2b54f0dbSXin Li
299*2b54f0dbSXin Li cores = HeapAlloc(heap, HEAP_ZERO_MEMORY, cores_count * sizeof(struct cpuinfo_core));
300*2b54f0dbSXin Li if (cores == NULL) {
301*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
302*2b54f0dbSXin Li cores_count * sizeof(struct cpuinfo_core), cores_count);
303*2b54f0dbSXin Li goto cleanup;
304*2b54f0dbSXin Li }
305*2b54f0dbSXin Li
306*2b54f0dbSXin Li clusters = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_cluster));
307*2b54f0dbSXin Li if (clusters == NULL) {
308*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
309*2b54f0dbSXin Li packages_count * sizeof(struct cpuinfo_cluster), packages_count);
310*2b54f0dbSXin Li goto cleanup;
311*2b54f0dbSXin Li }
312*2b54f0dbSXin Li
313*2b54f0dbSXin Li packages = HeapAlloc(heap, HEAP_ZERO_MEMORY, packages_count * sizeof(struct cpuinfo_package));
314*2b54f0dbSXin Li if (packages == NULL) {
315*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
316*2b54f0dbSXin Li packages_count * sizeof(struct cpuinfo_package), packages_count);
317*2b54f0dbSXin Li goto cleanup;
318*2b54f0dbSXin Li }
319*2b54f0dbSXin Li
320*2b54f0dbSXin Li for (uint32_t i = processors_count; i != 0; i--) {
321*2b54f0dbSXin Li const uint32_t processor_id = i - 1;
322*2b54f0dbSXin Li struct cpuinfo_processor* processor = processors + processor_id;
323*2b54f0dbSXin Li
324*2b54f0dbSXin Li /* Adjust core and package pointers for all logical processors */
325*2b54f0dbSXin Li struct cpuinfo_core* core =
326*2b54f0dbSXin Li (struct cpuinfo_core*) ((uintptr_t) cores + (uintptr_t) processor->core);
327*2b54f0dbSXin Li processor->core = core;
328*2b54f0dbSXin Li struct cpuinfo_cluster* cluster =
329*2b54f0dbSXin Li (struct cpuinfo_cluster*) ((uintptr_t) clusters + (uintptr_t) processor->cluster);
330*2b54f0dbSXin Li processor->cluster = cluster;
331*2b54f0dbSXin Li struct cpuinfo_package* package =
332*2b54f0dbSXin Li (struct cpuinfo_package*) ((uintptr_t) packages + (uintptr_t) processor->package);
333*2b54f0dbSXin Li processor->package = package;
334*2b54f0dbSXin Li
335*2b54f0dbSXin Li /* This can be overwritten by lower-index processors on the same package */
336*2b54f0dbSXin Li package->processor_start = processor_id;
337*2b54f0dbSXin Li package->processor_count += 1;
338*2b54f0dbSXin Li
339*2b54f0dbSXin Li /* This can be overwritten by lower-index processors on the same cluster */
340*2b54f0dbSXin Li cluster->processor_start = processor_id;
341*2b54f0dbSXin Li cluster->processor_count += 1;
342*2b54f0dbSXin Li
343*2b54f0dbSXin Li /* This can be overwritten by lower-index processors on the same core*/
344*2b54f0dbSXin Li core->processor_start = processor_id;
345*2b54f0dbSXin Li core->processor_count += 1;
346*2b54f0dbSXin Li }
347*2b54f0dbSXin Li
348*2b54f0dbSXin Li /* Set vendor/uarch/CPUID information for cores */
349*2b54f0dbSXin Li for (uint32_t i = cores_count; i != 0; i--) {
350*2b54f0dbSXin Li const uint32_t global_core_id = i - 1;
351*2b54f0dbSXin Li struct cpuinfo_core* core = cores + global_core_id;
352*2b54f0dbSXin Li const struct cpuinfo_processor* processor = processors + core->processor_start;
353*2b54f0dbSXin Li struct cpuinfo_package* package = (struct cpuinfo_package*) processor->package;
354*2b54f0dbSXin Li struct cpuinfo_cluster* cluster = (struct cpuinfo_cluster*) processor->cluster;
355*2b54f0dbSXin Li
356*2b54f0dbSXin Li core->cluster = cluster;
357*2b54f0dbSXin Li core->package = package;
358*2b54f0dbSXin Li core->core_id = core_bits_mask &
359*2b54f0dbSXin Li (processor->apic_id >> x86_processor.topology.core_bits_offset);
360*2b54f0dbSXin Li core->vendor = x86_processor.vendor;
361*2b54f0dbSXin Li core->uarch = x86_processor.uarch;
362*2b54f0dbSXin Li core->cpuid = x86_processor.cpuid;
363*2b54f0dbSXin Li
364*2b54f0dbSXin Li /* This can be overwritten by lower-index cores on the same cluster/package */
365*2b54f0dbSXin Li cluster->core_start = global_core_id;
366*2b54f0dbSXin Li cluster->core_count += 1;
367*2b54f0dbSXin Li package->core_start = global_core_id;
368*2b54f0dbSXin Li package->core_count += 1;
369*2b54f0dbSXin Li }
370*2b54f0dbSXin Li
371*2b54f0dbSXin Li for (uint32_t i = 0; i < packages_count; i++) {
372*2b54f0dbSXin Li struct cpuinfo_package* package = packages + i;
373*2b54f0dbSXin Li struct cpuinfo_cluster* cluster = clusters + i;
374*2b54f0dbSXin Li
375*2b54f0dbSXin Li cluster->package = package;
376*2b54f0dbSXin Li cluster->vendor = cores[cluster->core_start].vendor;
377*2b54f0dbSXin Li cluster->uarch = cores[cluster->core_start].uarch;
378*2b54f0dbSXin Li cluster->cpuid = cores[cluster->core_start].cpuid;
379*2b54f0dbSXin Li package->cluster_start = i;
380*2b54f0dbSXin Li package->cluster_count = 1;
381*2b54f0dbSXin Li cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, package->name);
382*2b54f0dbSXin Li }
383*2b54f0dbSXin Li
384*2b54f0dbSXin Li /* Count caches */
385*2b54f0dbSXin Li uint32_t l1i_count, l1d_count, l2_count, l3_count, l4_count;
386*2b54f0dbSXin Li cpuinfo_x86_count_caches(processors_count, processors, &x86_processor,
387*2b54f0dbSXin Li &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count);
388*2b54f0dbSXin Li
389*2b54f0dbSXin Li /* Allocate cache descriptions */
390*2b54f0dbSXin Li if (l1i_count != 0) {
391*2b54f0dbSXin Li l1i = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1i_count * sizeof(struct cpuinfo_cache));
392*2b54f0dbSXin Li if (l1i == NULL) {
393*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
394*2b54f0dbSXin Li l1i_count * sizeof(struct cpuinfo_cache), l1i_count);
395*2b54f0dbSXin Li goto cleanup;
396*2b54f0dbSXin Li }
397*2b54f0dbSXin Li }
398*2b54f0dbSXin Li if (l1d_count != 0) {
399*2b54f0dbSXin Li l1d = HeapAlloc(heap, HEAP_ZERO_MEMORY, l1d_count * sizeof(struct cpuinfo_cache));
400*2b54f0dbSXin Li if (l1d == NULL) {
401*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
402*2b54f0dbSXin Li l1d_count * sizeof(struct cpuinfo_cache), l1d_count);
403*2b54f0dbSXin Li goto cleanup;
404*2b54f0dbSXin Li }
405*2b54f0dbSXin Li }
406*2b54f0dbSXin Li if (l2_count != 0) {
407*2b54f0dbSXin Li l2 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l2_count * sizeof(struct cpuinfo_cache));
408*2b54f0dbSXin Li if (l2 == NULL) {
409*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
410*2b54f0dbSXin Li l2_count * sizeof(struct cpuinfo_cache), l2_count);
411*2b54f0dbSXin Li goto cleanup;
412*2b54f0dbSXin Li }
413*2b54f0dbSXin Li }
414*2b54f0dbSXin Li if (l3_count != 0) {
415*2b54f0dbSXin Li l3 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l3_count * sizeof(struct cpuinfo_cache));
416*2b54f0dbSXin Li if (l3 == NULL) {
417*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
418*2b54f0dbSXin Li l3_count * sizeof(struct cpuinfo_cache), l3_count);
419*2b54f0dbSXin Li goto cleanup;
420*2b54f0dbSXin Li }
421*2b54f0dbSXin Li }
422*2b54f0dbSXin Li if (l4_count != 0) {
423*2b54f0dbSXin Li l4 = HeapAlloc(heap, HEAP_ZERO_MEMORY, l4_count * sizeof(struct cpuinfo_cache));
424*2b54f0dbSXin Li if (l4 == NULL) {
425*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
426*2b54f0dbSXin Li l4_count * sizeof(struct cpuinfo_cache), l4_count);
427*2b54f0dbSXin Li goto cleanup;
428*2b54f0dbSXin Li }
429*2b54f0dbSXin Li }
430*2b54f0dbSXin Li
431*2b54f0dbSXin Li /* Set cache information */
432*2b54f0dbSXin Li uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX;
433*2b54f0dbSXin Li uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
434*2b54f0dbSXin Li uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
435*2b54f0dbSXin Li for (uint32_t i = 0; i < processors_count; i++) {
436*2b54f0dbSXin Li const uint32_t apic_id = processors[i].apic_id;
437*2b54f0dbSXin Li
438*2b54f0dbSXin Li if (x86_processor.cache.l1i.size != 0) {
439*2b54f0dbSXin Li const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
440*2b54f0dbSXin Li processors[i].cache.l1i = &l1i[l1i_index];
441*2b54f0dbSXin Li if (l1i_id != last_l1i_id) {
442*2b54f0dbSXin Li /* new cache */
443*2b54f0dbSXin Li last_l1i_id = l1i_id;
444*2b54f0dbSXin Li l1i[++l1i_index] = (struct cpuinfo_cache) {
445*2b54f0dbSXin Li .size = x86_processor.cache.l1i.size,
446*2b54f0dbSXin Li .associativity = x86_processor.cache.l1i.associativity,
447*2b54f0dbSXin Li .sets = x86_processor.cache.l1i.sets,
448*2b54f0dbSXin Li .partitions = x86_processor.cache.l1i.partitions,
449*2b54f0dbSXin Li .line_size = x86_processor.cache.l1i.line_size,
450*2b54f0dbSXin Li .flags = x86_processor.cache.l1i.flags,
451*2b54f0dbSXin Li .processor_start = i,
452*2b54f0dbSXin Li .processor_count = 1,
453*2b54f0dbSXin Li };
454*2b54f0dbSXin Li } else {
455*2b54f0dbSXin Li /* another processor sharing the same cache */
456*2b54f0dbSXin Li l1i[l1i_index].processor_count += 1;
457*2b54f0dbSXin Li }
458*2b54f0dbSXin Li processors[i].cache.l1i = &l1i[l1i_index];
459*2b54f0dbSXin Li } else {
460*2b54f0dbSXin Li /* reset cache id */
461*2b54f0dbSXin Li last_l1i_id = UINT32_MAX;
462*2b54f0dbSXin Li }
463*2b54f0dbSXin Li if (x86_processor.cache.l1d.size != 0) {
464*2b54f0dbSXin Li const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
465*2b54f0dbSXin Li processors[i].cache.l1d = &l1d[l1d_index];
466*2b54f0dbSXin Li if (l1d_id != last_l1d_id) {
467*2b54f0dbSXin Li /* new cache */
468*2b54f0dbSXin Li last_l1d_id = l1d_id;
469*2b54f0dbSXin Li l1d[++l1d_index] = (struct cpuinfo_cache) {
470*2b54f0dbSXin Li .size = x86_processor.cache.l1d.size,
471*2b54f0dbSXin Li .associativity = x86_processor.cache.l1d.associativity,
472*2b54f0dbSXin Li .sets = x86_processor.cache.l1d.sets,
473*2b54f0dbSXin Li .partitions = x86_processor.cache.l1d.partitions,
474*2b54f0dbSXin Li .line_size = x86_processor.cache.l1d.line_size,
475*2b54f0dbSXin Li .flags = x86_processor.cache.l1d.flags,
476*2b54f0dbSXin Li .processor_start = i,
477*2b54f0dbSXin Li .processor_count = 1,
478*2b54f0dbSXin Li };
479*2b54f0dbSXin Li } else {
480*2b54f0dbSXin Li /* another processor sharing the same cache */
481*2b54f0dbSXin Li l1d[l1d_index].processor_count += 1;
482*2b54f0dbSXin Li }
483*2b54f0dbSXin Li processors[i].cache.l1d = &l1d[l1d_index];
484*2b54f0dbSXin Li } else {
485*2b54f0dbSXin Li /* reset cache id */
486*2b54f0dbSXin Li last_l1d_id = UINT32_MAX;
487*2b54f0dbSXin Li }
488*2b54f0dbSXin Li if (x86_processor.cache.l2.size != 0) {
489*2b54f0dbSXin Li const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
490*2b54f0dbSXin Li processors[i].cache.l2 = &l2[l2_index];
491*2b54f0dbSXin Li if (l2_id != last_l2_id) {
492*2b54f0dbSXin Li /* new cache */
493*2b54f0dbSXin Li last_l2_id = l2_id;
494*2b54f0dbSXin Li l2[++l2_index] = (struct cpuinfo_cache) {
495*2b54f0dbSXin Li .size = x86_processor.cache.l2.size,
496*2b54f0dbSXin Li .associativity = x86_processor.cache.l2.associativity,
497*2b54f0dbSXin Li .sets = x86_processor.cache.l2.sets,
498*2b54f0dbSXin Li .partitions = x86_processor.cache.l2.partitions,
499*2b54f0dbSXin Li .line_size = x86_processor.cache.l2.line_size,
500*2b54f0dbSXin Li .flags = x86_processor.cache.l2.flags,
501*2b54f0dbSXin Li .processor_start = i,
502*2b54f0dbSXin Li .processor_count = 1,
503*2b54f0dbSXin Li };
504*2b54f0dbSXin Li } else {
505*2b54f0dbSXin Li /* another processor sharing the same cache */
506*2b54f0dbSXin Li l2[l2_index].processor_count += 1;
507*2b54f0dbSXin Li }
508*2b54f0dbSXin Li processors[i].cache.l2 = &l2[l2_index];
509*2b54f0dbSXin Li } else {
510*2b54f0dbSXin Li /* reset cache id */
511*2b54f0dbSXin Li last_l2_id = UINT32_MAX;
512*2b54f0dbSXin Li }
513*2b54f0dbSXin Li if (x86_processor.cache.l3.size != 0) {
514*2b54f0dbSXin Li const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
515*2b54f0dbSXin Li processors[i].cache.l3 = &l3[l3_index];
516*2b54f0dbSXin Li if (l3_id != last_l3_id) {
517*2b54f0dbSXin Li /* new cache */
518*2b54f0dbSXin Li last_l3_id = l3_id;
519*2b54f0dbSXin Li l3[++l3_index] = (struct cpuinfo_cache) {
520*2b54f0dbSXin Li .size = x86_processor.cache.l3.size,
521*2b54f0dbSXin Li .associativity = x86_processor.cache.l3.associativity,
522*2b54f0dbSXin Li .sets = x86_processor.cache.l3.sets,
523*2b54f0dbSXin Li .partitions = x86_processor.cache.l3.partitions,
524*2b54f0dbSXin Li .line_size = x86_processor.cache.l3.line_size,
525*2b54f0dbSXin Li .flags = x86_processor.cache.l3.flags,
526*2b54f0dbSXin Li .processor_start = i,
527*2b54f0dbSXin Li .processor_count = 1,
528*2b54f0dbSXin Li };
529*2b54f0dbSXin Li } else {
530*2b54f0dbSXin Li /* another processor sharing the same cache */
531*2b54f0dbSXin Li l3[l3_index].processor_count += 1;
532*2b54f0dbSXin Li }
533*2b54f0dbSXin Li processors[i].cache.l3 = &l3[l3_index];
534*2b54f0dbSXin Li } else {
535*2b54f0dbSXin Li /* reset cache id */
536*2b54f0dbSXin Li last_l3_id = UINT32_MAX;
537*2b54f0dbSXin Li }
538*2b54f0dbSXin Li if (x86_processor.cache.l4.size != 0) {
539*2b54f0dbSXin Li const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
540*2b54f0dbSXin Li processors[i].cache.l4 = &l4[l4_index];
541*2b54f0dbSXin Li if (l4_id != last_l4_id) {
542*2b54f0dbSXin Li /* new cache */
543*2b54f0dbSXin Li last_l4_id = l4_id;
544*2b54f0dbSXin Li l4[++l4_index] = (struct cpuinfo_cache) {
545*2b54f0dbSXin Li .size = x86_processor.cache.l4.size,
546*2b54f0dbSXin Li .associativity = x86_processor.cache.l4.associativity,
547*2b54f0dbSXin Li .sets = x86_processor.cache.l4.sets,
548*2b54f0dbSXin Li .partitions = x86_processor.cache.l4.partitions,
549*2b54f0dbSXin Li .line_size = x86_processor.cache.l4.line_size,
550*2b54f0dbSXin Li .flags = x86_processor.cache.l4.flags,
551*2b54f0dbSXin Li .processor_start = i,
552*2b54f0dbSXin Li .processor_count = 1,
553*2b54f0dbSXin Li };
554*2b54f0dbSXin Li } else {
555*2b54f0dbSXin Li /* another processor sharing the same cache */
556*2b54f0dbSXin Li l4[l4_index].processor_count += 1;
557*2b54f0dbSXin Li }
558*2b54f0dbSXin Li processors[i].cache.l4 = &l4[l4_index];
559*2b54f0dbSXin Li } else {
560*2b54f0dbSXin Li /* reset cache id */
561*2b54f0dbSXin Li last_l4_id = UINT32_MAX;
562*2b54f0dbSXin Li }
563*2b54f0dbSXin Li }
564*2b54f0dbSXin Li
565*2b54f0dbSXin Li
566*2b54f0dbSXin Li /* Commit changes */
567*2b54f0dbSXin Li cpuinfo_processors = processors;
568*2b54f0dbSXin Li cpuinfo_cores = cores;
569*2b54f0dbSXin Li cpuinfo_clusters = clusters;
570*2b54f0dbSXin Li cpuinfo_packages = packages;
571*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
572*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
573*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_2] = l2;
574*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_3] = l3;
575*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_4] = l4;
576*2b54f0dbSXin Li
577*2b54f0dbSXin Li cpuinfo_processors_count = processors_count;
578*2b54f0dbSXin Li cpuinfo_cores_count = cores_count;
579*2b54f0dbSXin Li cpuinfo_clusters_count = packages_count;
580*2b54f0dbSXin Li cpuinfo_packages_count = packages_count;
581*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
582*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
583*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
584*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
585*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
586*2b54f0dbSXin Li cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
587*2b54f0dbSXin Li
588*2b54f0dbSXin Li cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
589*2b54f0dbSXin Li .uarch = x86_processor.uarch,
590*2b54f0dbSXin Li .cpuid = x86_processor.cpuid,
591*2b54f0dbSXin Li .processor_count = processors_count,
592*2b54f0dbSXin Li .core_count = cores_count,
593*2b54f0dbSXin Li };
594*2b54f0dbSXin Li
595*2b54f0dbSXin Li MemoryBarrier();
596*2b54f0dbSXin Li
597*2b54f0dbSXin Li cpuinfo_is_initialized = true;
598*2b54f0dbSXin Li
599*2b54f0dbSXin Li processors = NULL;
600*2b54f0dbSXin Li cores = NULL;
601*2b54f0dbSXin Li clusters = NULL;
602*2b54f0dbSXin Li packages = NULL;
603*2b54f0dbSXin Li l1i = l1d = l2 = l3 = l4 = NULL;
604*2b54f0dbSXin Li
605*2b54f0dbSXin Li cleanup:
606*2b54f0dbSXin Li if (processors != NULL) {
607*2b54f0dbSXin Li HeapFree(heap, 0, processors);
608*2b54f0dbSXin Li }
609*2b54f0dbSXin Li if (cores != NULL) {
610*2b54f0dbSXin Li HeapFree(heap, 0, cores);
611*2b54f0dbSXin Li }
612*2b54f0dbSXin Li if (clusters != NULL) {
613*2b54f0dbSXin Li HeapFree(heap, 0, clusters);
614*2b54f0dbSXin Li }
615*2b54f0dbSXin Li if (packages != NULL) {
616*2b54f0dbSXin Li HeapFree(heap, 0, packages);
617*2b54f0dbSXin Li }
618*2b54f0dbSXin Li if (l1i != NULL) {
619*2b54f0dbSXin Li HeapFree(heap, 0, l1i);
620*2b54f0dbSXin Li }
621*2b54f0dbSXin Li if (l1d != NULL) {
622*2b54f0dbSXin Li HeapFree(heap, 0, l1d);
623*2b54f0dbSXin Li }
624*2b54f0dbSXin Li if (l2 != NULL) {
625*2b54f0dbSXin Li HeapFree(heap, 0, l2);
626*2b54f0dbSXin Li }
627*2b54f0dbSXin Li if (l3 != NULL) {
628*2b54f0dbSXin Li HeapFree(heap, 0, l3);
629*2b54f0dbSXin Li }
630*2b54f0dbSXin Li if (l4 != NULL) {
631*2b54f0dbSXin Li HeapFree(heap, 0, l4);
632*2b54f0dbSXin Li }
633*2b54f0dbSXin Li return TRUE;
634*2b54f0dbSXin Li }
635