1*2b54f0dbSXin Li #include <stdint.h>
2*2b54f0dbSXin Li #include <stddef.h>
3*2b54f0dbSXin Li #include <stdlib.h>
4*2b54f0dbSXin Li #include <string.h>
5*2b54f0dbSXin Li
6*2b54f0dbSXin Li #include <cpuinfo.h>
7*2b54f0dbSXin Li #include <x86/api.h>
8*2b54f0dbSXin Li #include <x86/linux/api.h>
9*2b54f0dbSXin Li #include <linux/api.h>
10*2b54f0dbSXin Li #include <cpuinfo/internal-api.h>
11*2b54f0dbSXin Li #include <cpuinfo/log.h>
12*2b54f0dbSXin Li
13*2b54f0dbSXin Li
bit_mask(uint32_t bits)14*2b54f0dbSXin Li static inline uint32_t bit_mask(uint32_t bits) {
15*2b54f0dbSXin Li return (UINT32_C(1) << bits) - UINT32_C(1);
16*2b54f0dbSXin Li }
17*2b54f0dbSXin Li
bitmask_all(uint32_t bitfield,uint32_t mask)18*2b54f0dbSXin Li static inline bool bitmask_all(uint32_t bitfield, uint32_t mask) {
19*2b54f0dbSXin Li return (bitfield & mask) == mask;
20*2b54f0dbSXin Li }
21*2b54f0dbSXin Li
min(uint32_t a,uint32_t b)22*2b54f0dbSXin Li static inline uint32_t min(uint32_t a, uint32_t b) {
23*2b54f0dbSXin Li return a < b ? a : b;
24*2b54f0dbSXin Li }
25*2b54f0dbSXin Li
cmp(uint32_t a,uint32_t b)26*2b54f0dbSXin Li static inline int cmp(uint32_t a, uint32_t b) {
27*2b54f0dbSXin Li return (a > b) - (a < b);
28*2b54f0dbSXin Li }
29*2b54f0dbSXin Li
cmp_x86_linux_processor(const void * ptr_a,const void * ptr_b)30*2b54f0dbSXin Li static int cmp_x86_linux_processor(const void* ptr_a, const void* ptr_b) {
31*2b54f0dbSXin Li const struct cpuinfo_x86_linux_processor* processor_a = (const struct cpuinfo_x86_linux_processor*) ptr_a;
32*2b54f0dbSXin Li const struct cpuinfo_x86_linux_processor* processor_b = (const struct cpuinfo_x86_linux_processor*) ptr_b;
33*2b54f0dbSXin Li
34*2b54f0dbSXin Li /* Move usable processors towards the start of the array */
35*2b54f0dbSXin Li const bool usable_a = bitmask_all(processor_a->flags, CPUINFO_LINUX_FLAG_VALID);
36*2b54f0dbSXin Li const bool usable_b = bitmask_all(processor_b->flags, CPUINFO_LINUX_FLAG_VALID);
37*2b54f0dbSXin Li if (usable_a != usable_b) {
38*2b54f0dbSXin Li return (int) usable_b - (int) usable_a;
39*2b54f0dbSXin Li }
40*2b54f0dbSXin Li
41*2b54f0dbSXin Li /* Compare based on APIC ID (i.e. processor 0 < processor 1) */
42*2b54f0dbSXin Li const uint32_t id_a = processor_a->apic_id;
43*2b54f0dbSXin Li const uint32_t id_b = processor_b->apic_id;
44*2b54f0dbSXin Li return cmp(id_a, id_b);
45*2b54f0dbSXin Li }
46*2b54f0dbSXin Li
cpuinfo_x86_count_objects(uint32_t linux_processors_count,const struct cpuinfo_x86_linux_processor linux_processors[restrict static linux_processors_count],const struct cpuinfo_x86_processor processor[restrict static1],uint32_t valid_processor_mask,uint32_t llc_apic_bits,uint32_t cores_count_ptr[restrict static1],uint32_t clusters_count_ptr[restrict static1],uint32_t packages_count_ptr[restrict static1],uint32_t l1i_count_ptr[restrict static1],uint32_t l1d_count_ptr[restrict static1],uint32_t l2_count_ptr[restrict static1],uint32_t l3_count_ptr[restrict static1],uint32_t l4_count_ptr[restrict static1])47*2b54f0dbSXin Li static void cpuinfo_x86_count_objects(
48*2b54f0dbSXin Li uint32_t linux_processors_count,
49*2b54f0dbSXin Li const struct cpuinfo_x86_linux_processor linux_processors[restrict static linux_processors_count],
50*2b54f0dbSXin Li const struct cpuinfo_x86_processor processor[restrict static 1],
51*2b54f0dbSXin Li uint32_t valid_processor_mask,
52*2b54f0dbSXin Li uint32_t llc_apic_bits,
53*2b54f0dbSXin Li uint32_t cores_count_ptr[restrict static 1],
54*2b54f0dbSXin Li uint32_t clusters_count_ptr[restrict static 1],
55*2b54f0dbSXin Li uint32_t packages_count_ptr[restrict static 1],
56*2b54f0dbSXin Li uint32_t l1i_count_ptr[restrict static 1],
57*2b54f0dbSXin Li uint32_t l1d_count_ptr[restrict static 1],
58*2b54f0dbSXin Li uint32_t l2_count_ptr[restrict static 1],
59*2b54f0dbSXin Li uint32_t l3_count_ptr[restrict static 1],
60*2b54f0dbSXin Li uint32_t l4_count_ptr[restrict static 1])
61*2b54f0dbSXin Li {
62*2b54f0dbSXin Li const uint32_t core_apic_mask =
63*2b54f0dbSXin Li ~(bit_mask(processor->topology.thread_bits_length) << processor->topology.thread_bits_offset);
64*2b54f0dbSXin Li const uint32_t package_apic_mask =
65*2b54f0dbSXin Li core_apic_mask & ~(bit_mask(processor->topology.core_bits_length) << processor->topology.core_bits_offset);
66*2b54f0dbSXin Li const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
67*2b54f0dbSXin Li const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
68*2b54f0dbSXin Li
69*2b54f0dbSXin Li uint32_t cores_count = 0, clusters_count = 0, packages_count = 0;
70*2b54f0dbSXin Li uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
71*2b54f0dbSXin Li uint32_t last_core_id = UINT32_MAX, last_cluster_id = UINT32_MAX, last_package_id = UINT32_MAX;
72*2b54f0dbSXin Li uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
73*2b54f0dbSXin Li uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
74*2b54f0dbSXin Li for (uint32_t i = 0; i < linux_processors_count; i++) {
75*2b54f0dbSXin Li if (bitmask_all(linux_processors[i].flags, valid_processor_mask)) {
76*2b54f0dbSXin Li const uint32_t apic_id = linux_processors[i].apic_id;
77*2b54f0dbSXin Li cpuinfo_log_debug("APID ID %"PRIu32": system processor %"PRIu32, apic_id, linux_processors[i].linux_id);
78*2b54f0dbSXin Li
79*2b54f0dbSXin Li /* All bits of APIC ID except thread ID mask */
80*2b54f0dbSXin Li const uint32_t core_id = apic_id & core_apic_mask;
81*2b54f0dbSXin Li if (core_id != last_core_id) {
82*2b54f0dbSXin Li last_core_id = core_id;
83*2b54f0dbSXin Li cores_count++;
84*2b54f0dbSXin Li }
85*2b54f0dbSXin Li /* All bits of APIC ID except thread ID and core ID masks */
86*2b54f0dbSXin Li const uint32_t package_id = apic_id & package_apic_mask;
87*2b54f0dbSXin Li if (package_id != last_package_id) {
88*2b54f0dbSXin Li last_package_id = package_id;
89*2b54f0dbSXin Li packages_count++;
90*2b54f0dbSXin Li }
91*2b54f0dbSXin Li /* Bits of APIC ID which are part of either LLC or package ID mask */
92*2b54f0dbSXin Li const uint32_t cluster_id = apic_id & cluster_apic_mask;
93*2b54f0dbSXin Li if (cluster_id != last_cluster_id) {
94*2b54f0dbSXin Li last_cluster_id = cluster_id;
95*2b54f0dbSXin Li clusters_count++;
96*2b54f0dbSXin Li }
97*2b54f0dbSXin Li if (processor->cache.l1i.size != 0) {
98*2b54f0dbSXin Li const uint32_t l1i_id = apic_id & ~bit_mask(processor->cache.l1i.apic_bits);
99*2b54f0dbSXin Li if (l1i_id != last_l1i_id) {
100*2b54f0dbSXin Li last_l1i_id = l1i_id;
101*2b54f0dbSXin Li l1i_count++;
102*2b54f0dbSXin Li }
103*2b54f0dbSXin Li }
104*2b54f0dbSXin Li if (processor->cache.l1d.size != 0) {
105*2b54f0dbSXin Li const uint32_t l1d_id = apic_id & ~bit_mask(processor->cache.l1d.apic_bits);
106*2b54f0dbSXin Li if (l1d_id != last_l1d_id) {
107*2b54f0dbSXin Li last_l1d_id = l1d_id;
108*2b54f0dbSXin Li l1d_count++;
109*2b54f0dbSXin Li }
110*2b54f0dbSXin Li }
111*2b54f0dbSXin Li if (processor->cache.l2.size != 0) {
112*2b54f0dbSXin Li const uint32_t l2_id = apic_id & ~bit_mask(processor->cache.l2.apic_bits);
113*2b54f0dbSXin Li if (l2_id != last_l2_id) {
114*2b54f0dbSXin Li last_l2_id = l2_id;
115*2b54f0dbSXin Li l2_count++;
116*2b54f0dbSXin Li }
117*2b54f0dbSXin Li }
118*2b54f0dbSXin Li if (processor->cache.l3.size != 0) {
119*2b54f0dbSXin Li const uint32_t l3_id = apic_id & ~bit_mask(processor->cache.l3.apic_bits);
120*2b54f0dbSXin Li if (l3_id != last_l3_id) {
121*2b54f0dbSXin Li last_l3_id = l3_id;
122*2b54f0dbSXin Li l3_count++;
123*2b54f0dbSXin Li }
124*2b54f0dbSXin Li }
125*2b54f0dbSXin Li if (processor->cache.l4.size != 0) {
126*2b54f0dbSXin Li const uint32_t l4_id = apic_id & ~bit_mask(processor->cache.l4.apic_bits);
127*2b54f0dbSXin Li if (l4_id != last_l4_id) {
128*2b54f0dbSXin Li last_l4_id = l4_id;
129*2b54f0dbSXin Li l4_count++;
130*2b54f0dbSXin Li }
131*2b54f0dbSXin Li }
132*2b54f0dbSXin Li }
133*2b54f0dbSXin Li }
134*2b54f0dbSXin Li *cores_count_ptr = cores_count;
135*2b54f0dbSXin Li *clusters_count_ptr = clusters_count;
136*2b54f0dbSXin Li *packages_count_ptr = packages_count;
137*2b54f0dbSXin Li *l1i_count_ptr = l1i_count;
138*2b54f0dbSXin Li *l1d_count_ptr = l1d_count;
139*2b54f0dbSXin Li *l2_count_ptr = l2_count;
140*2b54f0dbSXin Li *l3_count_ptr = l3_count;
141*2b54f0dbSXin Li *l4_count_ptr = l4_count;
142*2b54f0dbSXin Li }
143*2b54f0dbSXin Li
cpuinfo_x86_linux_init(void)144*2b54f0dbSXin Li void cpuinfo_x86_linux_init(void) {
145*2b54f0dbSXin Li struct cpuinfo_x86_linux_processor* x86_linux_processors = NULL;
146*2b54f0dbSXin Li struct cpuinfo_processor* processors = NULL;
147*2b54f0dbSXin Li struct cpuinfo_core* cores = NULL;
148*2b54f0dbSXin Li struct cpuinfo_cluster* clusters = NULL;
149*2b54f0dbSXin Li struct cpuinfo_package* packages = NULL;
150*2b54f0dbSXin Li const struct cpuinfo_processor** linux_cpu_to_processor_map = NULL;
151*2b54f0dbSXin Li const struct cpuinfo_core** linux_cpu_to_core_map = NULL;
152*2b54f0dbSXin Li struct cpuinfo_cache* l1i = NULL;
153*2b54f0dbSXin Li struct cpuinfo_cache* l1d = NULL;
154*2b54f0dbSXin Li struct cpuinfo_cache* l2 = NULL;
155*2b54f0dbSXin Li struct cpuinfo_cache* l3 = NULL;
156*2b54f0dbSXin Li struct cpuinfo_cache* l4 = NULL;
157*2b54f0dbSXin Li
158*2b54f0dbSXin Li const uint32_t max_processors_count = cpuinfo_linux_get_max_processors_count();
159*2b54f0dbSXin Li cpuinfo_log_debug("system maximum processors count: %"PRIu32, max_processors_count);
160*2b54f0dbSXin Li
161*2b54f0dbSXin Li const uint32_t max_possible_processors_count = 1 +
162*2b54f0dbSXin Li cpuinfo_linux_get_max_possible_processor(max_processors_count);
163*2b54f0dbSXin Li cpuinfo_log_debug("maximum possible processors count: %"PRIu32, max_possible_processors_count);
164*2b54f0dbSXin Li const uint32_t max_present_processors_count = 1 +
165*2b54f0dbSXin Li cpuinfo_linux_get_max_present_processor(max_processors_count);
166*2b54f0dbSXin Li cpuinfo_log_debug("maximum present processors count: %"PRIu32, max_present_processors_count);
167*2b54f0dbSXin Li
168*2b54f0dbSXin Li uint32_t valid_processor_mask = 0;
169*2b54f0dbSXin Li uint32_t x86_linux_processors_count = max_processors_count;
170*2b54f0dbSXin Li if (max_present_processors_count != 0) {
171*2b54f0dbSXin Li x86_linux_processors_count = min(x86_linux_processors_count, max_present_processors_count);
172*2b54f0dbSXin Li valid_processor_mask = CPUINFO_LINUX_FLAG_PRESENT;
173*2b54f0dbSXin Li } else {
174*2b54f0dbSXin Li valid_processor_mask = CPUINFO_LINUX_FLAG_PROC_CPUINFO;
175*2b54f0dbSXin Li }
176*2b54f0dbSXin Li if (max_possible_processors_count != 0) {
177*2b54f0dbSXin Li x86_linux_processors_count = min(x86_linux_processors_count, max_possible_processors_count);
178*2b54f0dbSXin Li valid_processor_mask |= CPUINFO_LINUX_FLAG_POSSIBLE;
179*2b54f0dbSXin Li }
180*2b54f0dbSXin Li
181*2b54f0dbSXin Li x86_linux_processors = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor));
182*2b54f0dbSXin Li if (x86_linux_processors == NULL) {
183*2b54f0dbSXin Li cpuinfo_log_error(
184*2b54f0dbSXin Li "failed to allocate %zu bytes for descriptions of %"PRIu32" x86 logical processors",
185*2b54f0dbSXin Li x86_linux_processors_count * sizeof(struct cpuinfo_x86_linux_processor),
186*2b54f0dbSXin Li x86_linux_processors_count);
187*2b54f0dbSXin Li return;
188*2b54f0dbSXin Li }
189*2b54f0dbSXin Li
190*2b54f0dbSXin Li if (max_possible_processors_count != 0) {
191*2b54f0dbSXin Li cpuinfo_linux_detect_possible_processors(
192*2b54f0dbSXin Li x86_linux_processors_count, &x86_linux_processors->flags,
193*2b54f0dbSXin Li sizeof(struct cpuinfo_x86_linux_processor),
194*2b54f0dbSXin Li CPUINFO_LINUX_FLAG_POSSIBLE);
195*2b54f0dbSXin Li }
196*2b54f0dbSXin Li
197*2b54f0dbSXin Li if (max_present_processors_count != 0) {
198*2b54f0dbSXin Li cpuinfo_linux_detect_present_processors(
199*2b54f0dbSXin Li x86_linux_processors_count, &x86_linux_processors->flags,
200*2b54f0dbSXin Li sizeof(struct cpuinfo_x86_linux_processor),
201*2b54f0dbSXin Li CPUINFO_LINUX_FLAG_PRESENT);
202*2b54f0dbSXin Li }
203*2b54f0dbSXin Li
204*2b54f0dbSXin Li if (!cpuinfo_x86_linux_parse_proc_cpuinfo(x86_linux_processors_count, x86_linux_processors)) {
205*2b54f0dbSXin Li cpuinfo_log_error("failed to parse processor information from /proc/cpuinfo");
206*2b54f0dbSXin Li return;
207*2b54f0dbSXin Li }
208*2b54f0dbSXin Li
209*2b54f0dbSXin Li for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
210*2b54f0dbSXin Li if (bitmask_all(x86_linux_processors[i].flags, valid_processor_mask)) {
211*2b54f0dbSXin Li x86_linux_processors[i].flags |= CPUINFO_LINUX_FLAG_VALID;
212*2b54f0dbSXin Li }
213*2b54f0dbSXin Li }
214*2b54f0dbSXin Li
215*2b54f0dbSXin Li struct cpuinfo_x86_processor x86_processor;
216*2b54f0dbSXin Li memset(&x86_processor, 0, sizeof(x86_processor));
217*2b54f0dbSXin Li cpuinfo_x86_init_processor(&x86_processor);
218*2b54f0dbSXin Li char brand_string[48];
219*2b54f0dbSXin Li cpuinfo_x86_normalize_brand_string(x86_processor.brand_string, brand_string);
220*2b54f0dbSXin Li
221*2b54f0dbSXin Li uint32_t processors_count = 0;
222*2b54f0dbSXin Li for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
223*2b54f0dbSXin Li if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
224*2b54f0dbSXin Li x86_linux_processors[i].linux_id = i;
225*2b54f0dbSXin Li processors_count++;
226*2b54f0dbSXin Li }
227*2b54f0dbSXin Li }
228*2b54f0dbSXin Li
229*2b54f0dbSXin Li qsort(x86_linux_processors, x86_linux_processors_count, sizeof(struct cpuinfo_x86_linux_processor),
230*2b54f0dbSXin Li cmp_x86_linux_processor);
231*2b54f0dbSXin Li
232*2b54f0dbSXin Li processors = calloc(processors_count, sizeof(struct cpuinfo_processor));
233*2b54f0dbSXin Li if (processors == NULL) {
234*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" logical processors",
235*2b54f0dbSXin Li processors_count * sizeof(struct cpuinfo_processor), processors_count);
236*2b54f0dbSXin Li goto cleanup;
237*2b54f0dbSXin Li }
238*2b54f0dbSXin Li
239*2b54f0dbSXin Li uint32_t llc_apic_bits = 0;
240*2b54f0dbSXin Li if (x86_processor.cache.l4.size != 0) {
241*2b54f0dbSXin Li llc_apic_bits = x86_processor.cache.l4.apic_bits;
242*2b54f0dbSXin Li } else if (x86_processor.cache.l3.size != 0) {
243*2b54f0dbSXin Li llc_apic_bits = x86_processor.cache.l3.apic_bits;
244*2b54f0dbSXin Li } else if (x86_processor.cache.l2.size != 0) {
245*2b54f0dbSXin Li llc_apic_bits = x86_processor.cache.l2.apic_bits;
246*2b54f0dbSXin Li } else if (x86_processor.cache.l1d.size != 0) {
247*2b54f0dbSXin Li llc_apic_bits = x86_processor.cache.l1d.apic_bits;
248*2b54f0dbSXin Li }
249*2b54f0dbSXin Li uint32_t packages_count = 0, clusters_count = 0, cores_count = 0;
250*2b54f0dbSXin Li uint32_t l1i_count = 0, l1d_count = 0, l2_count = 0, l3_count = 0, l4_count = 0;
251*2b54f0dbSXin Li cpuinfo_x86_count_objects(
252*2b54f0dbSXin Li x86_linux_processors_count, x86_linux_processors, &x86_processor, valid_processor_mask, llc_apic_bits,
253*2b54f0dbSXin Li &cores_count, &clusters_count, &packages_count, &l1i_count, &l1d_count, &l2_count, &l3_count, &l4_count);
254*2b54f0dbSXin Li
255*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" cores", cores_count);
256*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" clusters", clusters_count);
257*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" packages", packages_count);
258*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L1I caches", l1i_count);
259*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L1D caches", l1d_count);
260*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L2 caches", l2_count);
261*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L3 caches", l3_count);
262*2b54f0dbSXin Li cpuinfo_log_debug("detected %"PRIu32" L4 caches", l4_count);
263*2b54f0dbSXin Li
264*2b54f0dbSXin Li linux_cpu_to_processor_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_processor*));
265*2b54f0dbSXin Li if (linux_cpu_to_processor_map == NULL) {
266*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" logical processors",
267*2b54f0dbSXin Li x86_linux_processors_count * sizeof(struct cpuinfo_processor*),
268*2b54f0dbSXin Li x86_linux_processors_count);
269*2b54f0dbSXin Li goto cleanup;
270*2b54f0dbSXin Li }
271*2b54f0dbSXin Li
272*2b54f0dbSXin Li linux_cpu_to_core_map = calloc(x86_linux_processors_count, sizeof(struct cpuinfo_core*));
273*2b54f0dbSXin Li if (linux_cpu_to_core_map == NULL) {
274*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for mapping entries of %"PRIu32" cores",
275*2b54f0dbSXin Li x86_linux_processors_count * sizeof(struct cpuinfo_core*),
276*2b54f0dbSXin Li x86_linux_processors_count);
277*2b54f0dbSXin Li goto cleanup;
278*2b54f0dbSXin Li }
279*2b54f0dbSXin Li
280*2b54f0dbSXin Li cores = calloc(cores_count, sizeof(struct cpuinfo_core));
281*2b54f0dbSXin Li if (cores == NULL) {
282*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" cores",
283*2b54f0dbSXin Li cores_count * sizeof(struct cpuinfo_core), cores_count);
284*2b54f0dbSXin Li goto cleanup;
285*2b54f0dbSXin Li }
286*2b54f0dbSXin Li
287*2b54f0dbSXin Li clusters = calloc(clusters_count, sizeof(struct cpuinfo_cluster));
288*2b54f0dbSXin Li if (clusters == NULL) {
289*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" core clusters",
290*2b54f0dbSXin Li clusters_count * sizeof(struct cpuinfo_cluster), clusters_count);
291*2b54f0dbSXin Li goto cleanup;
292*2b54f0dbSXin Li }
293*2b54f0dbSXin Li
294*2b54f0dbSXin Li packages = calloc(packages_count, sizeof(struct cpuinfo_package));
295*2b54f0dbSXin Li if (packages == NULL) {
296*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" physical packages",
297*2b54f0dbSXin Li packages_count * sizeof(struct cpuinfo_package), packages_count);
298*2b54f0dbSXin Li goto cleanup;
299*2b54f0dbSXin Li }
300*2b54f0dbSXin Li
301*2b54f0dbSXin Li if (l1i_count != 0) {
302*2b54f0dbSXin Li l1i = calloc(l1i_count, sizeof(struct cpuinfo_cache));
303*2b54f0dbSXin Li if (l1i == NULL) {
304*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1I caches",
305*2b54f0dbSXin Li l1i_count * sizeof(struct cpuinfo_cache), l1i_count);
306*2b54f0dbSXin Li goto cleanup;
307*2b54f0dbSXin Li }
308*2b54f0dbSXin Li }
309*2b54f0dbSXin Li if (l1d_count != 0) {
310*2b54f0dbSXin Li l1d = calloc(l1d_count, sizeof(struct cpuinfo_cache));
311*2b54f0dbSXin Li if (l1d == NULL) {
312*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L1D caches",
313*2b54f0dbSXin Li l1d_count * sizeof(struct cpuinfo_cache), l1d_count);
314*2b54f0dbSXin Li goto cleanup;
315*2b54f0dbSXin Li }
316*2b54f0dbSXin Li }
317*2b54f0dbSXin Li if (l2_count != 0) {
318*2b54f0dbSXin Li l2 = calloc(l2_count, sizeof(struct cpuinfo_cache));
319*2b54f0dbSXin Li if (l2 == NULL) {
320*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L2 caches",
321*2b54f0dbSXin Li l2_count * sizeof(struct cpuinfo_cache), l2_count);
322*2b54f0dbSXin Li goto cleanup;
323*2b54f0dbSXin Li }
324*2b54f0dbSXin Li }
325*2b54f0dbSXin Li if (l3_count != 0) {
326*2b54f0dbSXin Li l3 = calloc(l3_count, sizeof(struct cpuinfo_cache));
327*2b54f0dbSXin Li if (l3 == NULL) {
328*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L3 caches",
329*2b54f0dbSXin Li l3_count * sizeof(struct cpuinfo_cache), l3_count);
330*2b54f0dbSXin Li goto cleanup;
331*2b54f0dbSXin Li }
332*2b54f0dbSXin Li }
333*2b54f0dbSXin Li if (l4_count != 0) {
334*2b54f0dbSXin Li l4 = calloc(l4_count, sizeof(struct cpuinfo_cache));
335*2b54f0dbSXin Li if (l4 == NULL) {
336*2b54f0dbSXin Li cpuinfo_log_error("failed to allocate %zu bytes for descriptions of %"PRIu32" L4 caches",
337*2b54f0dbSXin Li l4_count * sizeof(struct cpuinfo_cache), l4_count);
338*2b54f0dbSXin Li goto cleanup;
339*2b54f0dbSXin Li }
340*2b54f0dbSXin Li }
341*2b54f0dbSXin Li
342*2b54f0dbSXin Li const uint32_t core_apic_mask =
343*2b54f0dbSXin Li ~(bit_mask(x86_processor.topology.thread_bits_length) << x86_processor.topology.thread_bits_offset);
344*2b54f0dbSXin Li const uint32_t package_apic_mask =
345*2b54f0dbSXin Li core_apic_mask & ~(bit_mask(x86_processor.topology.core_bits_length) << x86_processor.topology.core_bits_offset);
346*2b54f0dbSXin Li const uint32_t llc_apic_mask = ~bit_mask(llc_apic_bits);
347*2b54f0dbSXin Li const uint32_t cluster_apic_mask = package_apic_mask | llc_apic_mask;
348*2b54f0dbSXin Li
349*2b54f0dbSXin Li uint32_t processor_index = UINT32_MAX, core_index = UINT32_MAX, cluster_index = UINT32_MAX, package_index = UINT32_MAX;
350*2b54f0dbSXin Li uint32_t l1i_index = UINT32_MAX, l1d_index = UINT32_MAX, l2_index = UINT32_MAX, l3_index = UINT32_MAX, l4_index = UINT32_MAX;
351*2b54f0dbSXin Li uint32_t cluster_id = 0, core_id = 0, smt_id = 0;
352*2b54f0dbSXin Li uint32_t last_apic_core_id = UINT32_MAX, last_apic_cluster_id = UINT32_MAX, last_apic_package_id = UINT32_MAX;
353*2b54f0dbSXin Li uint32_t last_l1i_id = UINT32_MAX, last_l1d_id = UINT32_MAX;
354*2b54f0dbSXin Li uint32_t last_l2_id = UINT32_MAX, last_l3_id = UINT32_MAX, last_l4_id = UINT32_MAX;
355*2b54f0dbSXin Li for (uint32_t i = 0; i < x86_linux_processors_count; i++) {
356*2b54f0dbSXin Li if (bitmask_all(x86_linux_processors[i].flags, CPUINFO_LINUX_FLAG_VALID)) {
357*2b54f0dbSXin Li const uint32_t apic_id = x86_linux_processors[i].apic_id;
358*2b54f0dbSXin Li processor_index++;
359*2b54f0dbSXin Li smt_id++;
360*2b54f0dbSXin Li
361*2b54f0dbSXin Li /* All bits of APIC ID except thread ID mask */
362*2b54f0dbSXin Li const uint32_t apid_core_id = apic_id & core_apic_mask;
363*2b54f0dbSXin Li if (apid_core_id != last_apic_core_id) {
364*2b54f0dbSXin Li core_index++;
365*2b54f0dbSXin Li core_id++;
366*2b54f0dbSXin Li smt_id = 0;
367*2b54f0dbSXin Li }
368*2b54f0dbSXin Li /* Bits of APIC ID which are part of either LLC or package ID mask */
369*2b54f0dbSXin Li const uint32_t apic_cluster_id = apic_id & cluster_apic_mask;
370*2b54f0dbSXin Li if (apic_cluster_id != last_apic_cluster_id) {
371*2b54f0dbSXin Li cluster_index++;
372*2b54f0dbSXin Li cluster_id++;
373*2b54f0dbSXin Li }
374*2b54f0dbSXin Li /* All bits of APIC ID except thread ID and core ID masks */
375*2b54f0dbSXin Li const uint32_t apic_package_id = apic_id & package_apic_mask;
376*2b54f0dbSXin Li if (apic_package_id != last_apic_package_id) {
377*2b54f0dbSXin Li package_index++;
378*2b54f0dbSXin Li core_id = 0;
379*2b54f0dbSXin Li cluster_id = 0;
380*2b54f0dbSXin Li }
381*2b54f0dbSXin Li
382*2b54f0dbSXin Li /* Initialize logical processor object */
383*2b54f0dbSXin Li processors[processor_index].smt_id = smt_id;
384*2b54f0dbSXin Li processors[processor_index].core = cores + core_index;
385*2b54f0dbSXin Li processors[processor_index].cluster = clusters + cluster_index;
386*2b54f0dbSXin Li processors[processor_index].package = packages + package_index;
387*2b54f0dbSXin Li processors[processor_index].linux_id = x86_linux_processors[i].linux_id;
388*2b54f0dbSXin Li processors[processor_index].apic_id = x86_linux_processors[i].apic_id;
389*2b54f0dbSXin Li
390*2b54f0dbSXin Li if (apid_core_id != last_apic_core_id) {
391*2b54f0dbSXin Li /* new core */
392*2b54f0dbSXin Li cores[core_index] = (struct cpuinfo_core) {
393*2b54f0dbSXin Li .processor_start = processor_index,
394*2b54f0dbSXin Li .processor_count = 1,
395*2b54f0dbSXin Li .core_id = core_id,
396*2b54f0dbSXin Li .cluster = clusters + cluster_index,
397*2b54f0dbSXin Li .package = packages + package_index,
398*2b54f0dbSXin Li .vendor = x86_processor.vendor,
399*2b54f0dbSXin Li .uarch = x86_processor.uarch,
400*2b54f0dbSXin Li .cpuid = x86_processor.cpuid,
401*2b54f0dbSXin Li };
402*2b54f0dbSXin Li clusters[cluster_index].core_count += 1;
403*2b54f0dbSXin Li packages[package_index].core_count += 1;
404*2b54f0dbSXin Li last_apic_core_id = apid_core_id;
405*2b54f0dbSXin Li } else {
406*2b54f0dbSXin Li /* another logical processor on the same core */
407*2b54f0dbSXin Li cores[core_index].processor_count++;
408*2b54f0dbSXin Li }
409*2b54f0dbSXin Li
410*2b54f0dbSXin Li if (apic_cluster_id != last_apic_cluster_id) {
411*2b54f0dbSXin Li /* new cluster */
412*2b54f0dbSXin Li clusters[cluster_index].processor_start = processor_index;
413*2b54f0dbSXin Li clusters[cluster_index].processor_count = 1;
414*2b54f0dbSXin Li clusters[cluster_index].core_start = core_index;
415*2b54f0dbSXin Li clusters[cluster_index].cluster_id = cluster_id;
416*2b54f0dbSXin Li clusters[cluster_index].package = packages + package_index;
417*2b54f0dbSXin Li clusters[cluster_index].vendor = x86_processor.vendor;
418*2b54f0dbSXin Li clusters[cluster_index].uarch = x86_processor.uarch;
419*2b54f0dbSXin Li clusters[cluster_index].cpuid = x86_processor.cpuid;
420*2b54f0dbSXin Li packages[package_index].cluster_count += 1;
421*2b54f0dbSXin Li last_apic_cluster_id = apic_cluster_id;
422*2b54f0dbSXin Li } else {
423*2b54f0dbSXin Li /* another logical processor on the same cluster */
424*2b54f0dbSXin Li clusters[cluster_index].processor_count++;
425*2b54f0dbSXin Li }
426*2b54f0dbSXin Li
427*2b54f0dbSXin Li if (apic_package_id != last_apic_package_id) {
428*2b54f0dbSXin Li /* new package */
429*2b54f0dbSXin Li packages[package_index].processor_start = processor_index;
430*2b54f0dbSXin Li packages[package_index].processor_count = 1;
431*2b54f0dbSXin Li packages[package_index].core_start = core_index;
432*2b54f0dbSXin Li packages[package_index].cluster_start = cluster_index;
433*2b54f0dbSXin Li cpuinfo_x86_format_package_name(x86_processor.vendor, brand_string, packages[package_index].name);
434*2b54f0dbSXin Li last_apic_package_id = apic_package_id;
435*2b54f0dbSXin Li } else {
436*2b54f0dbSXin Li /* another logical processor on the same package */
437*2b54f0dbSXin Li packages[package_index].processor_count++;
438*2b54f0dbSXin Li }
439*2b54f0dbSXin Li
440*2b54f0dbSXin Li linux_cpu_to_processor_map[x86_linux_processors[i].linux_id] = processors + processor_index;
441*2b54f0dbSXin Li linux_cpu_to_core_map[x86_linux_processors[i].linux_id] = cores + core_index;
442*2b54f0dbSXin Li
443*2b54f0dbSXin Li if (x86_processor.cache.l1i.size != 0) {
444*2b54f0dbSXin Li const uint32_t l1i_id = apic_id & ~bit_mask(x86_processor.cache.l1i.apic_bits);
445*2b54f0dbSXin Li processors[i].cache.l1i = &l1i[l1i_index];
446*2b54f0dbSXin Li if (l1i_id != last_l1i_id) {
447*2b54f0dbSXin Li /* new cache */
448*2b54f0dbSXin Li last_l1i_id = l1i_id;
449*2b54f0dbSXin Li l1i[++l1i_index] = (struct cpuinfo_cache) {
450*2b54f0dbSXin Li .size = x86_processor.cache.l1i.size,
451*2b54f0dbSXin Li .associativity = x86_processor.cache.l1i.associativity,
452*2b54f0dbSXin Li .sets = x86_processor.cache.l1i.sets,
453*2b54f0dbSXin Li .partitions = x86_processor.cache.l1i.partitions,
454*2b54f0dbSXin Li .line_size = x86_processor.cache.l1i.line_size,
455*2b54f0dbSXin Li .flags = x86_processor.cache.l1i.flags,
456*2b54f0dbSXin Li .processor_start = processor_index,
457*2b54f0dbSXin Li .processor_count = 1,
458*2b54f0dbSXin Li };
459*2b54f0dbSXin Li } else {
460*2b54f0dbSXin Li /* another processor sharing the same cache */
461*2b54f0dbSXin Li l1i[l1i_index].processor_count += 1;
462*2b54f0dbSXin Li }
463*2b54f0dbSXin Li processors[i].cache.l1i = &l1i[l1i_index];
464*2b54f0dbSXin Li } else {
465*2b54f0dbSXin Li /* reset cache id */
466*2b54f0dbSXin Li last_l1i_id = UINT32_MAX;
467*2b54f0dbSXin Li }
468*2b54f0dbSXin Li if (x86_processor.cache.l1d.size != 0) {
469*2b54f0dbSXin Li const uint32_t l1d_id = apic_id & ~bit_mask(x86_processor.cache.l1d.apic_bits);
470*2b54f0dbSXin Li processors[i].cache.l1d = &l1d[l1d_index];
471*2b54f0dbSXin Li if (l1d_id != last_l1d_id) {
472*2b54f0dbSXin Li /* new cache */
473*2b54f0dbSXin Li last_l1d_id = l1d_id;
474*2b54f0dbSXin Li l1d[++l1d_index] = (struct cpuinfo_cache) {
475*2b54f0dbSXin Li .size = x86_processor.cache.l1d.size,
476*2b54f0dbSXin Li .associativity = x86_processor.cache.l1d.associativity,
477*2b54f0dbSXin Li .sets = x86_processor.cache.l1d.sets,
478*2b54f0dbSXin Li .partitions = x86_processor.cache.l1d.partitions,
479*2b54f0dbSXin Li .line_size = x86_processor.cache.l1d.line_size,
480*2b54f0dbSXin Li .flags = x86_processor.cache.l1d.flags,
481*2b54f0dbSXin Li .processor_start = processor_index,
482*2b54f0dbSXin Li .processor_count = 1,
483*2b54f0dbSXin Li };
484*2b54f0dbSXin Li } else {
485*2b54f0dbSXin Li /* another processor sharing the same cache */
486*2b54f0dbSXin Li l1d[l1d_index].processor_count += 1;
487*2b54f0dbSXin Li }
488*2b54f0dbSXin Li processors[i].cache.l1d = &l1d[l1d_index];
489*2b54f0dbSXin Li } else {
490*2b54f0dbSXin Li /* reset cache id */
491*2b54f0dbSXin Li last_l1d_id = UINT32_MAX;
492*2b54f0dbSXin Li }
493*2b54f0dbSXin Li if (x86_processor.cache.l2.size != 0) {
494*2b54f0dbSXin Li const uint32_t l2_id = apic_id & ~bit_mask(x86_processor.cache.l2.apic_bits);
495*2b54f0dbSXin Li processors[i].cache.l2 = &l2[l2_index];
496*2b54f0dbSXin Li if (l2_id != last_l2_id) {
497*2b54f0dbSXin Li /* new cache */
498*2b54f0dbSXin Li last_l2_id = l2_id;
499*2b54f0dbSXin Li l2[++l2_index] = (struct cpuinfo_cache) {
500*2b54f0dbSXin Li .size = x86_processor.cache.l2.size,
501*2b54f0dbSXin Li .associativity = x86_processor.cache.l2.associativity,
502*2b54f0dbSXin Li .sets = x86_processor.cache.l2.sets,
503*2b54f0dbSXin Li .partitions = x86_processor.cache.l2.partitions,
504*2b54f0dbSXin Li .line_size = x86_processor.cache.l2.line_size,
505*2b54f0dbSXin Li .flags = x86_processor.cache.l2.flags,
506*2b54f0dbSXin Li .processor_start = processor_index,
507*2b54f0dbSXin Li .processor_count = 1,
508*2b54f0dbSXin Li };
509*2b54f0dbSXin Li } else {
510*2b54f0dbSXin Li /* another processor sharing the same cache */
511*2b54f0dbSXin Li l2[l2_index].processor_count += 1;
512*2b54f0dbSXin Li }
513*2b54f0dbSXin Li processors[i].cache.l2 = &l2[l2_index];
514*2b54f0dbSXin Li } else {
515*2b54f0dbSXin Li /* reset cache id */
516*2b54f0dbSXin Li last_l2_id = UINT32_MAX;
517*2b54f0dbSXin Li }
518*2b54f0dbSXin Li if (x86_processor.cache.l3.size != 0) {
519*2b54f0dbSXin Li const uint32_t l3_id = apic_id & ~bit_mask(x86_processor.cache.l3.apic_bits);
520*2b54f0dbSXin Li processors[i].cache.l3 = &l3[l3_index];
521*2b54f0dbSXin Li if (l3_id != last_l3_id) {
522*2b54f0dbSXin Li /* new cache */
523*2b54f0dbSXin Li last_l3_id = l3_id;
524*2b54f0dbSXin Li l3[++l3_index] = (struct cpuinfo_cache) {
525*2b54f0dbSXin Li .size = x86_processor.cache.l3.size,
526*2b54f0dbSXin Li .associativity = x86_processor.cache.l3.associativity,
527*2b54f0dbSXin Li .sets = x86_processor.cache.l3.sets,
528*2b54f0dbSXin Li .partitions = x86_processor.cache.l3.partitions,
529*2b54f0dbSXin Li .line_size = x86_processor.cache.l3.line_size,
530*2b54f0dbSXin Li .flags = x86_processor.cache.l3.flags,
531*2b54f0dbSXin Li .processor_start = processor_index,
532*2b54f0dbSXin Li .processor_count = 1,
533*2b54f0dbSXin Li };
534*2b54f0dbSXin Li } else {
535*2b54f0dbSXin Li /* another processor sharing the same cache */
536*2b54f0dbSXin Li l3[l3_index].processor_count += 1;
537*2b54f0dbSXin Li }
538*2b54f0dbSXin Li processors[i].cache.l3 = &l3[l3_index];
539*2b54f0dbSXin Li } else {
540*2b54f0dbSXin Li /* reset cache id */
541*2b54f0dbSXin Li last_l3_id = UINT32_MAX;
542*2b54f0dbSXin Li }
543*2b54f0dbSXin Li if (x86_processor.cache.l4.size != 0) {
544*2b54f0dbSXin Li const uint32_t l4_id = apic_id & ~bit_mask(x86_processor.cache.l4.apic_bits);
545*2b54f0dbSXin Li processors[i].cache.l4 = &l4[l4_index];
546*2b54f0dbSXin Li if (l4_id != last_l4_id) {
547*2b54f0dbSXin Li /* new cache */
548*2b54f0dbSXin Li last_l4_id = l4_id;
549*2b54f0dbSXin Li l4[++l4_index] = (struct cpuinfo_cache) {
550*2b54f0dbSXin Li .size = x86_processor.cache.l4.size,
551*2b54f0dbSXin Li .associativity = x86_processor.cache.l4.associativity,
552*2b54f0dbSXin Li .sets = x86_processor.cache.l4.sets,
553*2b54f0dbSXin Li .partitions = x86_processor.cache.l4.partitions,
554*2b54f0dbSXin Li .line_size = x86_processor.cache.l4.line_size,
555*2b54f0dbSXin Li .flags = x86_processor.cache.l4.flags,
556*2b54f0dbSXin Li .processor_start = processor_index,
557*2b54f0dbSXin Li .processor_count = 1,
558*2b54f0dbSXin Li };
559*2b54f0dbSXin Li } else {
560*2b54f0dbSXin Li /* another processor sharing the same cache */
561*2b54f0dbSXin Li l4[l4_index].processor_count += 1;
562*2b54f0dbSXin Li }
563*2b54f0dbSXin Li processors[i].cache.l4 = &l4[l4_index];
564*2b54f0dbSXin Li } else {
565*2b54f0dbSXin Li /* reset cache id */
566*2b54f0dbSXin Li last_l4_id = UINT32_MAX;
567*2b54f0dbSXin Li }
568*2b54f0dbSXin Li }
569*2b54f0dbSXin Li }
570*2b54f0dbSXin Li
571*2b54f0dbSXin Li /* Commit changes */
572*2b54f0dbSXin Li cpuinfo_processors = processors;
573*2b54f0dbSXin Li cpuinfo_cores = cores;
574*2b54f0dbSXin Li cpuinfo_clusters = clusters;
575*2b54f0dbSXin Li cpuinfo_packages = packages;
576*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1i] = l1i;
577*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_1d] = l1d;
578*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_2] = l2;
579*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_3] = l3;
580*2b54f0dbSXin Li cpuinfo_cache[cpuinfo_cache_level_4] = l4;
581*2b54f0dbSXin Li
582*2b54f0dbSXin Li cpuinfo_processors_count = processors_count;
583*2b54f0dbSXin Li cpuinfo_cores_count = cores_count;
584*2b54f0dbSXin Li cpuinfo_clusters_count = clusters_count;
585*2b54f0dbSXin Li cpuinfo_packages_count = packages_count;
586*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1i] = l1i_count;
587*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_1d] = l1d_count;
588*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_2] = l2_count;
589*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_3] = l3_count;
590*2b54f0dbSXin Li cpuinfo_cache_count[cpuinfo_cache_level_4] = l4_count;
591*2b54f0dbSXin Li cpuinfo_max_cache_size = cpuinfo_compute_max_cache_size(&processors[0]);
592*2b54f0dbSXin Li
593*2b54f0dbSXin Li cpuinfo_global_uarch = (struct cpuinfo_uarch_info) {
594*2b54f0dbSXin Li .uarch = x86_processor.uarch,
595*2b54f0dbSXin Li .cpuid = x86_processor.cpuid,
596*2b54f0dbSXin Li .processor_count = processors_count,
597*2b54f0dbSXin Li .core_count = cores_count,
598*2b54f0dbSXin Li };
599*2b54f0dbSXin Li
600*2b54f0dbSXin Li cpuinfo_linux_cpu_max = x86_linux_processors_count;
601*2b54f0dbSXin Li cpuinfo_linux_cpu_to_processor_map = linux_cpu_to_processor_map;
602*2b54f0dbSXin Li cpuinfo_linux_cpu_to_core_map = linux_cpu_to_core_map;
603*2b54f0dbSXin Li
604*2b54f0dbSXin Li __sync_synchronize();
605*2b54f0dbSXin Li
606*2b54f0dbSXin Li cpuinfo_is_initialized = true;
607*2b54f0dbSXin Li
608*2b54f0dbSXin Li processors = NULL;
609*2b54f0dbSXin Li cores = NULL;
610*2b54f0dbSXin Li clusters = NULL;
611*2b54f0dbSXin Li packages = NULL;
612*2b54f0dbSXin Li l1i = l1d = l2 = l3 = l4 = NULL;
613*2b54f0dbSXin Li linux_cpu_to_processor_map = NULL;
614*2b54f0dbSXin Li linux_cpu_to_core_map = NULL;
615*2b54f0dbSXin Li
616*2b54f0dbSXin Li cleanup:
617*2b54f0dbSXin Li free(x86_linux_processors);
618*2b54f0dbSXin Li free(processors);
619*2b54f0dbSXin Li free(cores);
620*2b54f0dbSXin Li free(clusters);
621*2b54f0dbSXin Li free(packages);
622*2b54f0dbSXin Li free(l1i);
623*2b54f0dbSXin Li free(l1d);
624*2b54f0dbSXin Li free(l2);
625*2b54f0dbSXin Li free(l3);
626*2b54f0dbSXin Li free(l4);
627*2b54f0dbSXin Li free(linux_cpu_to_processor_map);
628*2b54f0dbSXin Li free(linux_cpu_to_core_map);
629*2b54f0dbSXin Li }
630