1*2b54f0dbSXin Li #include <stdbool.h>
2*2b54f0dbSXin Li #include <stdint.h>
3*2b54f0dbSXin Li #include <stddef.h>
4*2b54f0dbSXin Li #include <stdio.h>
5*2b54f0dbSXin Li #include <string.h>
6*2b54f0dbSXin Li
7*2b54f0dbSXin Li #include <cpuinfo.h>
8*2b54f0dbSXin Li #include <cpuinfo/common.h>
9*2b54f0dbSXin Li #include <x86/api.h>
10*2b54f0dbSXin Li
11*2b54f0dbSXin Li
12*2b54f0dbSXin Li /* The state of the parser to be preserved between parsing different tokens. */
13*2b54f0dbSXin Li struct parser_state {
14*2b54f0dbSXin Li /*
15*2b54f0dbSXin Li * Pointer to the start of the previous token if it is "model".
16*2b54f0dbSXin Li * NULL if previous token is not "model".
17*2b54f0dbSXin Li */
18*2b54f0dbSXin Li char* context_model;
19*2b54f0dbSXin Li /*
20*2b54f0dbSXin Li * Pointer to the start of the previous token if it is a single-uppercase-letter token.
21*2b54f0dbSXin Li * NULL if previous token is anything different.
22*2b54f0dbSXin Li */
23*2b54f0dbSXin Li char* context_upper_letter;
24*2b54f0dbSXin Li /*
25*2b54f0dbSXin Li * Pointer to the start of the previous token if it is "Dual".
26*2b54f0dbSXin Li * NULL if previous token is not "Dual".
27*2b54f0dbSXin Li */
28*2b54f0dbSXin Li char* context_dual;
29*2b54f0dbSXin Li /*
30*2b54f0dbSXin Li * Pointer to the start of the previous token if it is "Core", "Dual-Core", "QuadCore", etc.
31*2b54f0dbSXin Li * NULL if previous token is anything different.
32*2b54f0dbSXin Li */
33*2b54f0dbSXin Li char* context_core;
34*2b54f0dbSXin Li /*
35*2b54f0dbSXin Li * Pointer to the start of the previous token if it is "Eng" or "Engineering", etc.
36*2b54f0dbSXin Li * NULL if previous token is anything different.
37*2b54f0dbSXin Li */
38*2b54f0dbSXin Li char* context_engineering;
39*2b54f0dbSXin Li /*
40*2b54f0dbSXin Li * Pointer to the '@' symbol in the brand string (separates frequency specification).
41*2b54f0dbSXin Li * NULL if there is no '@' symbol.
42*2b54f0dbSXin Li */
43*2b54f0dbSXin Li char* frequency_separator;
44*2b54f0dbSXin Li /* Indicates whether the brand string (after transformations) contains frequency. */
45*2b54f0dbSXin Li bool frequency_token;
46*2b54f0dbSXin Li /* Indicates whether the processor is of Xeon family (contains "Xeon" substring). */
47*2b54f0dbSXin Li bool xeon;
48*2b54f0dbSXin Li /* Indicates whether the processor model number was already parsed. */
49*2b54f0dbSXin Li bool parsed_model_number;
50*2b54f0dbSXin Li /* Indicates whether the processor is an engineering sample (contains "Engineering Sample" or "Eng Sample" substrings). */
51*2b54f0dbSXin Li bool engineering_sample;
52*2b54f0dbSXin Li };
53*2b54f0dbSXin Li
54*2b54f0dbSXin Li /** @brief Resets information about the previous token. Keeps all other state information. */
reset_context(struct parser_state * state)55*2b54f0dbSXin Li static void reset_context(struct parser_state* state) {
56*2b54f0dbSXin Li state->context_model = NULL;
57*2b54f0dbSXin Li state->context_upper_letter = NULL;
58*2b54f0dbSXin Li state->context_dual = NULL;
59*2b54f0dbSXin Li state->context_core = NULL;
60*2b54f0dbSXin Li }
61*2b54f0dbSXin Li
62*2b54f0dbSXin Li /**
63*2b54f0dbSXin Li * @brief Overwrites the supplied string with space characters if it exactly matches the given string.
64*2b54f0dbSXin Li * @param string The string to be compared against other string, and erased in case of matching.
65*2b54f0dbSXin Li * @param length The length of the two string to be compared against each other.
66*2b54f0dbSXin Li * @param target The string to compare against.
67*2b54f0dbSXin Li * @retval true If the two strings match and the first supplied string was erased (overwritten with space characters).
68*2b54f0dbSXin Li * @retval false If the two strings are different and the first supplied string remained unchanged.
69*2b54f0dbSXin Li */
erase_matching(char * string,size_t length,const char * target)70*2b54f0dbSXin Li static inline bool erase_matching(char* string, size_t length, const char* target) {
71*2b54f0dbSXin Li const bool match = memcmp(string, target, length) == 0;
72*2b54f0dbSXin Li if (match) {
73*2b54f0dbSXin Li memset(string, ' ', length);
74*2b54f0dbSXin Li }
75*2b54f0dbSXin Li return match;
76*2b54f0dbSXin Li }
77*2b54f0dbSXin Li
78*2b54f0dbSXin Li /**
79*2b54f0dbSXin Li * @brief Checks if the supplied ASCII character is an uppercase latin letter.
80*2b54f0dbSXin Li * @param character The character to analyse.
81*2b54f0dbSXin Li * @retval true If the supplied character is an uppercase latin letter ('A' to 'Z').
82*2b54f0dbSXin Li * @retval false If the supplied character is anything different.
83*2b54f0dbSXin Li */
is_upper_letter(char character)84*2b54f0dbSXin Li static inline bool is_upper_letter(char character) {
85*2b54f0dbSXin Li return (uint32_t) (character - 'A') <= (uint32_t)('Z' - 'A');
86*2b54f0dbSXin Li }
87*2b54f0dbSXin Li
88*2b54f0dbSXin Li /**
89*2b54f0dbSXin Li * @brief Checks if the supplied ASCII character is a digit.
90*2b54f0dbSXin Li * @param character The character to analyse.
91*2b54f0dbSXin Li * @retval true If the supplied character is a digit ('0' to '9').
92*2b54f0dbSXin Li * @retval false If the supplied character is anything different.
93*2b54f0dbSXin Li */
is_digit(char character)94*2b54f0dbSXin Li static inline bool is_digit(char character) {
95*2b54f0dbSXin Li return (uint32_t) (character - '0') < UINT32_C(10);
96*2b54f0dbSXin Li }
97*2b54f0dbSXin Li
is_zero_number(const char * token_start,const char * token_end)98*2b54f0dbSXin Li static inline bool is_zero_number(const char* token_start, const char* token_end) {
99*2b54f0dbSXin Li for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
100*2b54f0dbSXin Li if (*char_ptr != '0') {
101*2b54f0dbSXin Li return false;
102*2b54f0dbSXin Li }
103*2b54f0dbSXin Li }
104*2b54f0dbSXin Li return true;
105*2b54f0dbSXin Li }
106*2b54f0dbSXin Li
is_space(const char * token_start,const char * token_end)107*2b54f0dbSXin Li static inline bool is_space(const char* token_start, const char* token_end) {
108*2b54f0dbSXin Li for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
109*2b54f0dbSXin Li if (*char_ptr != ' ') {
110*2b54f0dbSXin Li return false;
111*2b54f0dbSXin Li }
112*2b54f0dbSXin Li }
113*2b54f0dbSXin Li return true;
114*2b54f0dbSXin Li }
115*2b54f0dbSXin Li
is_number(const char * token_start,const char * token_end)116*2b54f0dbSXin Li static inline bool is_number(const char* token_start, const char* token_end) {
117*2b54f0dbSXin Li for (const char* char_ptr = token_start; char_ptr != token_end; char_ptr++) {
118*2b54f0dbSXin Li if (!is_digit(*char_ptr)) {
119*2b54f0dbSXin Li return false;
120*2b54f0dbSXin Li }
121*2b54f0dbSXin Li }
122*2b54f0dbSXin Li return true;
123*2b54f0dbSXin Li }
124*2b54f0dbSXin Li
is_model_number(const char * token_start,const char * token_end)125*2b54f0dbSXin Li static inline bool is_model_number(const char* token_start, const char* token_end) {
126*2b54f0dbSXin Li for (const char* char_ptr = token_start + 1; char_ptr < token_end; char_ptr++) {
127*2b54f0dbSXin Li if (is_digit(char_ptr[-1]) && is_digit(char_ptr[0])) {
128*2b54f0dbSXin Li return true;
129*2b54f0dbSXin Li }
130*2b54f0dbSXin Li }
131*2b54f0dbSXin Li return false;
132*2b54f0dbSXin Li }
133*2b54f0dbSXin Li
is_frequency(const char * token_start,const char * token_end)134*2b54f0dbSXin Li static inline bool is_frequency(const char* token_start, const char* token_end) {
135*2b54f0dbSXin Li const size_t token_length = (size_t) (token_end - token_start);
136*2b54f0dbSXin Li if (token_length > 3 && token_end[-2] == 'H' && token_end[-1] == 'z') {
137*2b54f0dbSXin Li switch (token_end[-3]) {
138*2b54f0dbSXin Li case 'K':
139*2b54f0dbSXin Li case 'M':
140*2b54f0dbSXin Li case 'G':
141*2b54f0dbSXin Li return true;
142*2b54f0dbSXin Li }
143*2b54f0dbSXin Li }
144*2b54f0dbSXin Li return false;
145*2b54f0dbSXin Li }
146*2b54f0dbSXin Li
147*2b54f0dbSXin Li /**
148*2b54f0dbSXin Li * @warning Input and output tokens can overlap
149*2b54f0dbSXin Li */
move_token(const char * token_start,const char * token_end,char * output_ptr)150*2b54f0dbSXin Li static inline char* move_token(const char* token_start, const char* token_end, char* output_ptr) {
151*2b54f0dbSXin Li const size_t token_length = (size_t) (token_end - token_start);
152*2b54f0dbSXin Li memmove(output_ptr, token_start, token_length);
153*2b54f0dbSXin Li return output_ptr + token_length;
154*2b54f0dbSXin Li }
155*2b54f0dbSXin Li
transform_token(char * token_start,char * token_end,struct parser_state * state)156*2b54f0dbSXin Li static bool transform_token(char* token_start, char* token_end, struct parser_state* state) {
157*2b54f0dbSXin Li const struct parser_state previousState = *state;
158*2b54f0dbSXin Li reset_context(state);
159*2b54f0dbSXin Li
160*2b54f0dbSXin Li size_t token_length = (size_t) (token_end - token_start);
161*2b54f0dbSXin Li
162*2b54f0dbSXin Li if (state->frequency_separator != NULL) {
163*2b54f0dbSXin Li if (token_start > state->frequency_separator) {
164*2b54f0dbSXin Li if (state->parsed_model_number) {
165*2b54f0dbSXin Li memset(token_start, ' ', token_length);
166*2b54f0dbSXin Li }
167*2b54f0dbSXin Li }
168*2b54f0dbSXin Li }
169*2b54f0dbSXin Li
170*2b54f0dbSXin Li
171*2b54f0dbSXin Li /* Early AMD and Cyrix processors have "tm" suffix for trademark, e.g.
172*2b54f0dbSXin Li * "AMD-K6tm w/ multimedia extensions"
173*2b54f0dbSXin Li * "Cyrix MediaGXtm MMXtm Enhanced"
174*2b54f0dbSXin Li */
175*2b54f0dbSXin Li if (token_length > 2) {
176*2b54f0dbSXin Li const char context_char = token_end[-3];
177*2b54f0dbSXin Li if (is_digit(context_char) || is_upper_letter(context_char)) {
178*2b54f0dbSXin Li if (erase_matching(token_end - 2, 2, "tm")) {
179*2b54f0dbSXin Li token_end -= 2;
180*2b54f0dbSXin Li token_length -= 2;
181*2b54f0dbSXin Li }
182*2b54f0dbSXin Li }
183*2b54f0dbSXin Li }
184*2b54f0dbSXin Li if (token_length > 4) {
185*2b54f0dbSXin Li /* Some early AMD CPUs have "AMD-" at the beginning, e.g.
186*2b54f0dbSXin Li * "AMD-K5(tm) Processor"
187*2b54f0dbSXin Li * "AMD-K6tm w/ multimedia extensions"
188*2b54f0dbSXin Li * "AMD-K6(tm) 3D+ Processor"
189*2b54f0dbSXin Li * "AMD-K6(tm)-III Processor"
190*2b54f0dbSXin Li */
191*2b54f0dbSXin Li if (erase_matching(token_start, 4, "AMD-")) {
192*2b54f0dbSXin Li token_start += 4;
193*2b54f0dbSXin Li token_length -= 4;
194*2b54f0dbSXin Li }
195*2b54f0dbSXin Li }
196*2b54f0dbSXin Li switch (token_length) {
197*2b54f0dbSXin Li case 1:
198*2b54f0dbSXin Li /*
199*2b54f0dbSXin Li * On some Intel processors there is a space between the first letter of
200*2b54f0dbSXin Li * the name and the number after it, e.g.
201*2b54f0dbSXin Li * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz"
202*2b54f0dbSXin Li * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz"
203*2b54f0dbSXin Li * We want to merge these parts together, in reverse order, i.e. "X 990" -> "990X", "820" -> "820Q"
204*2b54f0dbSXin Li */
205*2b54f0dbSXin Li if (is_upper_letter(token_start[0])) {
206*2b54f0dbSXin Li state->context_upper_letter = token_start;
207*2b54f0dbSXin Li return true;
208*2b54f0dbSXin Li }
209*2b54f0dbSXin Li break;
210*2b54f0dbSXin Li case 2:
211*2b54f0dbSXin Li /* Erase everything after "w/" in "AMD-K6tm w/ multimedia extensions" */
212*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "w/")) {
213*2b54f0dbSXin Li return false;
214*2b54f0dbSXin Li }
215*2b54f0dbSXin Li /*
216*2b54f0dbSXin Li * Intel Xeon processors since Ivy Bridge use versions, e.g.
217*2b54f0dbSXin Li * "Intel Xeon E3-1230 v2"
218*2b54f0dbSXin Li * Some processor branch strings report them as "V<N>", others report as "v<N>".
219*2b54f0dbSXin Li * Normalize the former (upper-case) to the latter (lower-case) version
220*2b54f0dbSXin Li */
221*2b54f0dbSXin Li if (token_start[0] == 'V' && is_digit(token_start[1])) {
222*2b54f0dbSXin Li token_start[0] = 'v';
223*2b54f0dbSXin Li return true;
224*2b54f0dbSXin Li }
225*2b54f0dbSXin Li break;
226*2b54f0dbSXin Li case 3:
227*2b54f0dbSXin Li /*
228*2b54f0dbSXin Li * Erase "CPU" in brand string on Intel processors, e.g.
229*2b54f0dbSXin Li * "Intel(R) Core(TM) i5 CPU 650 @ 3.20GHz"
230*2b54f0dbSXin Li * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz"
231*2b54f0dbSXin Li * "Intel(R) Atom(TM) CPU Z2760 @ 1.80GHz"
232*2b54f0dbSXin Li */
233*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "CPU")) {
234*2b54f0dbSXin Li return true;
235*2b54f0dbSXin Li }
236*2b54f0dbSXin Li /*
237*2b54f0dbSXin Li * Erase everything after "SOC" on AMD System-on-Chips, e.g.
238*2b54f0dbSXin Li * "AMD GX-212JC SOC with Radeon(TM) R2E Graphics \0"
239*2b54f0dbSXin Li */
240*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "SOC")) {
241*2b54f0dbSXin Li return false;
242*2b54f0dbSXin Li }
243*2b54f0dbSXin Li /*
244*2b54f0dbSXin Li * Erase "AMD" in brand string on AMD processors, e.g.
245*2b54f0dbSXin Li * "AMD Athlon(tm) Processor"
246*2b54f0dbSXin Li * "AMD Engineering Sample"
247*2b54f0dbSXin Li * "Quad-Core AMD Opteron(tm) Processor 2344 HE"
248*2b54f0dbSXin Li */
249*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "AMD")) {
250*2b54f0dbSXin Li return true;
251*2b54f0dbSXin Li }
252*2b54f0dbSXin Li /*
253*2b54f0dbSXin Li * Erase "VIA" in brand string on VIA processors, e.g.
254*2b54f0dbSXin Li * "VIA C3 Ezra"
255*2b54f0dbSXin Li * "VIA C7-M Processor 1200MHz"
256*2b54f0dbSXin Li * "VIA Nano L3050@1800MHz"
257*2b54f0dbSXin Li */
258*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "VIA")) {
259*2b54f0dbSXin Li return true;
260*2b54f0dbSXin Li }
261*2b54f0dbSXin Li /* Erase "IDT" in brand string on early Centaur processors, e.g. "IDT WinChip 2-3D" */
262*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "IDT")) {
263*2b54f0dbSXin Li return true;
264*2b54f0dbSXin Li }
265*2b54f0dbSXin Li /*
266*2b54f0dbSXin Li * Erase everything starting with "MMX" in
267*2b54f0dbSXin Li * "Cyrix MediaGXtm MMXtm Enhanced" ("tm" suffix is removed by this point)
268*2b54f0dbSXin Li */
269*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "MMX")) {
270*2b54f0dbSXin Li return false;
271*2b54f0dbSXin Li }
272*2b54f0dbSXin Li /*
273*2b54f0dbSXin Li * Erase everything starting with "APU" on AMD processors, e.g.
274*2b54f0dbSXin Li * "AMD A10-4600M APU with Radeon(tm) HD Graphics"
275*2b54f0dbSXin Li * "AMD A10-7850K APU with Radeon(TM) R7 Graphics"
276*2b54f0dbSXin Li * "AMD A6-6310 APU with AMD Radeon R4 Graphics"
277*2b54f0dbSXin Li */
278*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "APU")) {
279*2b54f0dbSXin Li return false;
280*2b54f0dbSXin Li }
281*2b54f0dbSXin Li /*
282*2b54f0dbSXin Li * Remember to discard string if it contains "Eng Sample",
283*2b54f0dbSXin Li * e.g. "Eng Sample, ZD302046W4K43_36/30/20_2/8_A"
284*2b54f0dbSXin Li */
285*2b54f0dbSXin Li if (memcmp(token_start, "Eng", token_length) == 0) {
286*2b54f0dbSXin Li state->context_engineering = token_start;
287*2b54f0dbSXin Li }
288*2b54f0dbSXin Li break;
289*2b54f0dbSXin Li case 4:
290*2b54f0dbSXin Li /* Remember to erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */
291*2b54f0dbSXin Li if (memcmp(token_start, "Dual", token_length) == 0) {
292*2b54f0dbSXin Li state->context_dual = token_start;
293*2b54f0dbSXin Li }
294*2b54f0dbSXin Li /* Remember if the processor is on Xeon family */
295*2b54f0dbSXin Li if (memcmp(token_start, "Xeon", token_length) == 0) {
296*2b54f0dbSXin Li state->xeon = true;
297*2b54f0dbSXin Li }
298*2b54f0dbSXin Li /* Erase "Dual Core" in "AMD Athlon(tm) 64 X2 Dual Core Processor 3800+" */
299*2b54f0dbSXin Li if (previousState.context_dual != NULL) {
300*2b54f0dbSXin Li if (memcmp(token_start, "Core", token_length) == 0) {
301*2b54f0dbSXin Li memset(previousState.context_dual, ' ', (size_t) (token_end - previousState.context_dual));
302*2b54f0dbSXin Li state->context_core = token_end;
303*2b54f0dbSXin Li return true;
304*2b54f0dbSXin Li }
305*2b54f0dbSXin Li }
306*2b54f0dbSXin Li break;
307*2b54f0dbSXin Li case 5:
308*2b54f0dbSXin Li /*
309*2b54f0dbSXin Li * Erase "Intel" in brand string on Intel processors, e.g.
310*2b54f0dbSXin Li * "Intel(R) Xeon(R) CPU X3210 @ 2.13GHz"
311*2b54f0dbSXin Li * "Intel(R) Atom(TM) CPU D2700 @ 2.13GHz"
312*2b54f0dbSXin Li * "Genuine Intel(R) processor 800MHz"
313*2b54f0dbSXin Li */
314*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Intel")) {
315*2b54f0dbSXin Li return true;
316*2b54f0dbSXin Li }
317*2b54f0dbSXin Li /*
318*2b54f0dbSXin Li * Erase "Cyrix" in brand string on Cyrix processors, e.g.
319*2b54f0dbSXin Li * "Cyrix MediaGXtm MMXtm Enhanced"
320*2b54f0dbSXin Li */
321*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Cyrix")) {
322*2b54f0dbSXin Li return true;
323*2b54f0dbSXin Li }
324*2b54f0dbSXin Li /*
325*2b54f0dbSXin Li * Erase everything following "Geode" (but not "Geode" token itself) on Geode processors, e.g.
326*2b54f0dbSXin Li * "Geode(TM) Integrated Processor by AMD PCS"
327*2b54f0dbSXin Li * "Geode(TM) Integrated Processor by National Semi"
328*2b54f0dbSXin Li */
329*2b54f0dbSXin Li if (memcmp(token_start, "Geode", token_length) == 0) {
330*2b54f0dbSXin Li return false;
331*2b54f0dbSXin Li }
332*2b54f0dbSXin Li /* Remember to erase "model unknown" in "AMD Processor model unknown" */
333*2b54f0dbSXin Li if (memcmp(token_start, "model", token_length) == 0) {
334*2b54f0dbSXin Li state->context_model = token_start;
335*2b54f0dbSXin Li return true;
336*2b54f0dbSXin Li }
337*2b54f0dbSXin Li break;
338*2b54f0dbSXin Li case 6:
339*2b54f0dbSXin Li /*
340*2b54f0dbSXin Li * Erase everything starting with "Radeon" or "RADEON" on AMD APUs, e.g.
341*2b54f0dbSXin Li * "A8-7670K Radeon R7, 10 Compute Cores 4C+6G"
342*2b54f0dbSXin Li * "FX-8800P Radeon R7, 12 Compute Cores 4C+8G"
343*2b54f0dbSXin Li * "A12-9800 RADEON R7, 12 COMPUTE CORES 4C+8G"
344*2b54f0dbSXin Li * "A9-9410 RADEON R5, 5 COMPUTE CORES 2C+3G"
345*2b54f0dbSXin Li */
346*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Radeon") || erase_matching(token_start, token_length, "RADEON")) {
347*2b54f0dbSXin Li return false;
348*2b54f0dbSXin Li }
349*2b54f0dbSXin Li /*
350*2b54f0dbSXin Li * Erase "Mobile" when it is not part of the processor name,
351*2b54f0dbSXin Li * e.g. in "AMD Turion(tm) X2 Ultra Dual-Core Mobile ZM-82"
352*2b54f0dbSXin Li */
353*2b54f0dbSXin Li if (previousState.context_core != NULL) {
354*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Mobile")) {
355*2b54f0dbSXin Li return true;
356*2b54f0dbSXin Li }
357*2b54f0dbSXin Li }
358*2b54f0dbSXin Li /* Erase "family" in "Intel(R) Pentium(R) III CPU family 1266MHz" */
359*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "family")) {
360*2b54f0dbSXin Li return true;
361*2b54f0dbSXin Li }
362*2b54f0dbSXin Li /* Discard the string if it contains "Engineering Sample" */
363*2b54f0dbSXin Li if (previousState.context_engineering != NULL) {
364*2b54f0dbSXin Li if (memcmp(token_start, "Sample", token_length) == 0) {
365*2b54f0dbSXin Li state->engineering_sample = true;
366*2b54f0dbSXin Li return false;
367*2b54f0dbSXin Li }
368*2b54f0dbSXin Li }
369*2b54f0dbSXin Li break;
370*2b54f0dbSXin Li case 7:
371*2b54f0dbSXin Li /*
372*2b54f0dbSXin Li * Erase "Geniune" in brand string on Intel engineering samples, e.g.
373*2b54f0dbSXin Li * "Genuine Intel(R) processor 800MHz"
374*2b54f0dbSXin Li * "Genuine Intel(R) CPU @ 2.13GHz"
375*2b54f0dbSXin Li * "Genuine Intel(R) CPU 0000 @ 1.73GHz"
376*2b54f0dbSXin Li */
377*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Genuine")) {
378*2b54f0dbSXin Li return true;
379*2b54f0dbSXin Li }
380*2b54f0dbSXin Li /*
381*2b54f0dbSXin Li * Erase "12-core" in brand string on AMD Threadripper, e.g.
382*2b54f0dbSXin Li * "AMD Ryzen Threadripper 1920X 12-Core Processor"
383*2b54f0dbSXin Li */
384*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "12-Core")) {
385*2b54f0dbSXin Li return true;
386*2b54f0dbSXin Li }
387*2b54f0dbSXin Li /*
388*2b54f0dbSXin Li * Erase "16-core" in brand string on AMD Threadripper, e.g.
389*2b54f0dbSXin Li * "AMD Ryzen Threadripper 1950X 16-Core Processor"
390*2b54f0dbSXin Li */
391*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "16-Core")) {
392*2b54f0dbSXin Li return true;
393*2b54f0dbSXin Li }
394*2b54f0dbSXin Li /* Erase "model unknown" in "AMD Processor model unknown" */
395*2b54f0dbSXin Li if (previousState.context_model != NULL) {
396*2b54f0dbSXin Li if (memcmp(token_start, "unknown", token_length) == 0) {
397*2b54f0dbSXin Li memset(previousState.context_model, ' ', token_end - previousState.context_model);
398*2b54f0dbSXin Li return true;
399*2b54f0dbSXin Li }
400*2b54f0dbSXin Li }
401*2b54f0dbSXin Li /*
402*2b54f0dbSXin Li * Discard the string if it contains "Eng Sample:" or "Eng Sample," e.g.
403*2b54f0dbSXin Li * "AMD Eng Sample, ZD302046W4K43_36/30/20_2/8_A"
404*2b54f0dbSXin Li * "AMD Eng Sample: 2D3151A2M88E4_35/31_N"
405*2b54f0dbSXin Li */
406*2b54f0dbSXin Li if (previousState.context_engineering != NULL) {
407*2b54f0dbSXin Li if (memcmp(token_start, "Sample,", token_length) == 0 || memcmp(token_start, "Sample:", token_length) == 0) {
408*2b54f0dbSXin Li state->engineering_sample = true;
409*2b54f0dbSXin Li return false;
410*2b54f0dbSXin Li }
411*2b54f0dbSXin Li }
412*2b54f0dbSXin Li break;
413*2b54f0dbSXin Li case 8:
414*2b54f0dbSXin Li /* Erase "QuadCore" in "VIA QuadCore L4700 @ 1.2+ GHz" */
415*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "QuadCore")) {
416*2b54f0dbSXin Li state->context_core = token_end;
417*2b54f0dbSXin Li return true;
418*2b54f0dbSXin Li }
419*2b54f0dbSXin Li /* Erase "Six-Core" in "AMD FX(tm)-6100 Six-Core Processor" */
420*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Six-Core")) {
421*2b54f0dbSXin Li state->context_core = token_end;
422*2b54f0dbSXin Li return true;
423*2b54f0dbSXin Li }
424*2b54f0dbSXin Li break;
425*2b54f0dbSXin Li case 9:
426*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Processor")) {
427*2b54f0dbSXin Li return true;
428*2b54f0dbSXin Li }
429*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "processor")) {
430*2b54f0dbSXin Li return true;
431*2b54f0dbSXin Li }
432*2b54f0dbSXin Li /* Erase "Dual-Core" in "Pentium(R) Dual-Core CPU T4200 @ 2.00GHz" */
433*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Dual-Core")) {
434*2b54f0dbSXin Li state->context_core = token_end;
435*2b54f0dbSXin Li return true;
436*2b54f0dbSXin Li }
437*2b54f0dbSXin Li /* Erase "Quad-Core" in AMD processors, e.g.
438*2b54f0dbSXin Li * "Quad-Core AMD Opteron(tm) Processor 2347 HE"
439*2b54f0dbSXin Li * "AMD FX(tm)-4170 Quad-Core Processor"
440*2b54f0dbSXin Li */
441*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Quad-Core")) {
442*2b54f0dbSXin Li state->context_core = token_end;
443*2b54f0dbSXin Li return true;
444*2b54f0dbSXin Li }
445*2b54f0dbSXin Li /* Erase "Transmeta" in brand string on Transmeta processors, e.g.
446*2b54f0dbSXin Li * "Transmeta(tm) Crusoe(tm) Processor TM5800"
447*2b54f0dbSXin Li * "Transmeta Efficeon(tm) Processor TM8000"
448*2b54f0dbSXin Li */
449*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Transmeta")) {
450*2b54f0dbSXin Li return true;
451*2b54f0dbSXin Li }
452*2b54f0dbSXin Li break;
453*2b54f0dbSXin Li case 10:
454*2b54f0dbSXin Li /*
455*2b54f0dbSXin Li * Erase "Eight-Core" in AMD processors, e.g.
456*2b54f0dbSXin Li * "AMD FX(tm)-8150 Eight-Core Processor"
457*2b54f0dbSXin Li */
458*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Eight-Core")) {
459*2b54f0dbSXin Li state->context_core = token_end;
460*2b54f0dbSXin Li return true;
461*2b54f0dbSXin Li }
462*2b54f0dbSXin Li break;
463*2b54f0dbSXin Li case 11:
464*2b54f0dbSXin Li /*
465*2b54f0dbSXin Li * Erase "Triple-Core" in AMD processors, e.g.
466*2b54f0dbSXin Li * "AMD Phenom(tm) II N830 Triple-Core Processor"
467*2b54f0dbSXin Li * "AMD Phenom(tm) 8650 Triple-Core Processor"
468*2b54f0dbSXin Li */
469*2b54f0dbSXin Li if (erase_matching(token_start, token_length, "Triple-Core")) {
470*2b54f0dbSXin Li state->context_core = token_end;
471*2b54f0dbSXin Li return true;
472*2b54f0dbSXin Li }
473*2b54f0dbSXin Li /*
474*2b54f0dbSXin Li * Remember to discard string if it contains "Engineering Sample",
475*2b54f0dbSXin Li * e.g. "AMD Engineering Sample"
476*2b54f0dbSXin Li */
477*2b54f0dbSXin Li if (memcmp(token_start, "Engineering", token_length) == 0) {
478*2b54f0dbSXin Li state->context_engineering = token_start;
479*2b54f0dbSXin Li return true;
480*2b54f0dbSXin Li }
481*2b54f0dbSXin Li break;
482*2b54f0dbSXin Li }
483*2b54f0dbSXin Li if (is_zero_number(token_start, token_end)) {
484*2b54f0dbSXin Li memset(token_start, ' ', token_length);
485*2b54f0dbSXin Li return true;
486*2b54f0dbSXin Li }
487*2b54f0dbSXin Li /* On some Intel processors the last letter of the name is put before the number,
488*2b54f0dbSXin Li * and an additional space it added, e.g.
489*2b54f0dbSXin Li * "Intel(R) Core(TM) i7 CPU X 990 @ 3.47GHz"
490*2b54f0dbSXin Li * "Intel(R) Core(TM) CPU Q 820 @ 1.73GHz"
491*2b54f0dbSXin Li * "Intel(R) Core(TM) i5 CPU M 480 @ 2.67GHz"
492*2b54f0dbSXin Li * We fix this issue, i.e. "X 990" -> "990X", "Q 820" -> "820Q"
493*2b54f0dbSXin Li */
494*2b54f0dbSXin Li if (previousState.context_upper_letter != 0) {
495*2b54f0dbSXin Li /* A single letter token followed by 2-to-5 digit letter is merged together */
496*2b54f0dbSXin Li switch (token_length) {
497*2b54f0dbSXin Li case 2:
498*2b54f0dbSXin Li case 3:
499*2b54f0dbSXin Li case 4:
500*2b54f0dbSXin Li case 5:
501*2b54f0dbSXin Li if (is_number(token_start, token_end)) {
502*2b54f0dbSXin Li /* Load the previous single-letter token */
503*2b54f0dbSXin Li const char letter = *previousState.context_upper_letter;
504*2b54f0dbSXin Li /* Erase the previous single-letter token */
505*2b54f0dbSXin Li *previousState.context_upper_letter = ' ';
506*2b54f0dbSXin Li /* Move the current token one position to the left */
507*2b54f0dbSXin Li move_token(token_start, token_end, token_start - 1);
508*2b54f0dbSXin Li token_start -= 1;
509*2b54f0dbSXin Li /*
510*2b54f0dbSXin Li * Add the letter on the end
511*2b54f0dbSXin Li * Note: accessing token_start[-1] is safe because this is not the first token
512*2b54f0dbSXin Li */
513*2b54f0dbSXin Li token_end[-1] = letter;
514*2b54f0dbSXin Li }
515*2b54f0dbSXin Li }
516*2b54f0dbSXin Li }
517*2b54f0dbSXin Li if (state->frequency_separator != NULL) {
518*2b54f0dbSXin Li if (is_model_number(token_start, token_end)) {
519*2b54f0dbSXin Li state->parsed_model_number = true;
520*2b54f0dbSXin Li }
521*2b54f0dbSXin Li }
522*2b54f0dbSXin Li if (is_frequency(token_start, token_end)) {
523*2b54f0dbSXin Li state->frequency_token = true;
524*2b54f0dbSXin Li }
525*2b54f0dbSXin Li return true;
526*2b54f0dbSXin Li }
527*2b54f0dbSXin Li
cpuinfo_x86_normalize_brand_string(const char raw_name[48],char normalized_name[48])528*2b54f0dbSXin Li uint32_t cpuinfo_x86_normalize_brand_string(
529*2b54f0dbSXin Li const char raw_name[48],
530*2b54f0dbSXin Li char normalized_name[48])
531*2b54f0dbSXin Li {
532*2b54f0dbSXin Li normalized_name[0] = '\0';
533*2b54f0dbSXin Li char name[48];
534*2b54f0dbSXin Li memcpy(name, raw_name, sizeof(name));
535*2b54f0dbSXin Li
536*2b54f0dbSXin Li /*
537*2b54f0dbSXin Li * First find the end of the string
538*2b54f0dbSXin Li * Start search from the end because some brand strings contain zeroes in the middle
539*2b54f0dbSXin Li */
540*2b54f0dbSXin Li char* name_end = &name[48];
541*2b54f0dbSXin Li while (name_end[-1] == '\0') {
542*2b54f0dbSXin Li /*
543*2b54f0dbSXin Li * Adject name_end by 1 position and check that we didn't reach the start of the brand string.
544*2b54f0dbSXin Li * This is possible if all characters are zero.
545*2b54f0dbSXin Li */
546*2b54f0dbSXin Li if (--name_end == name) {
547*2b54f0dbSXin Li /* All characters are zeros */
548*2b54f0dbSXin Li return 0;
549*2b54f0dbSXin Li }
550*2b54f0dbSXin Li }
551*2b54f0dbSXin Li
552*2b54f0dbSXin Li struct parser_state parser_state = { 0 };
553*2b54f0dbSXin Li
554*2b54f0dbSXin Li /* Now unify all whitespace characters: replace tabs and '\0' with spaces */
555*2b54f0dbSXin Li {
556*2b54f0dbSXin Li bool inside_parentheses = false;
557*2b54f0dbSXin Li for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
558*2b54f0dbSXin Li switch (*char_ptr) {
559*2b54f0dbSXin Li case '(':
560*2b54f0dbSXin Li inside_parentheses = true;
561*2b54f0dbSXin Li *char_ptr = ' ';
562*2b54f0dbSXin Li break;
563*2b54f0dbSXin Li case ')':
564*2b54f0dbSXin Li inside_parentheses = false;
565*2b54f0dbSXin Li *char_ptr = ' ';
566*2b54f0dbSXin Li break;
567*2b54f0dbSXin Li case '@':
568*2b54f0dbSXin Li parser_state.frequency_separator = char_ptr;
569*2b54f0dbSXin Li case '\0':
570*2b54f0dbSXin Li case '\t':
571*2b54f0dbSXin Li *char_ptr = ' ';
572*2b54f0dbSXin Li break;
573*2b54f0dbSXin Li default:
574*2b54f0dbSXin Li if (inside_parentheses) {
575*2b54f0dbSXin Li *char_ptr = ' ';
576*2b54f0dbSXin Li }
577*2b54f0dbSXin Li }
578*2b54f0dbSXin Li }
579*2b54f0dbSXin Li }
580*2b54f0dbSXin Li
581*2b54f0dbSXin Li /* Iterate through all tokens and erase redundant parts */
582*2b54f0dbSXin Li {
583*2b54f0dbSXin Li bool is_token = false;
584*2b54f0dbSXin Li char* token_start;
585*2b54f0dbSXin Li for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
586*2b54f0dbSXin Li if (*char_ptr == ' ') {
587*2b54f0dbSXin Li if (is_token) {
588*2b54f0dbSXin Li is_token = false;
589*2b54f0dbSXin Li if (!transform_token(token_start, char_ptr, &parser_state)) {
590*2b54f0dbSXin Li name_end = char_ptr;
591*2b54f0dbSXin Li break;
592*2b54f0dbSXin Li }
593*2b54f0dbSXin Li }
594*2b54f0dbSXin Li } else {
595*2b54f0dbSXin Li if (!is_token) {
596*2b54f0dbSXin Li is_token = true;
597*2b54f0dbSXin Li token_start = char_ptr;
598*2b54f0dbSXin Li }
599*2b54f0dbSXin Li }
600*2b54f0dbSXin Li }
601*2b54f0dbSXin Li if (is_token) {
602*2b54f0dbSXin Li transform_token(token_start, name_end, &parser_state);
603*2b54f0dbSXin Li }
604*2b54f0dbSXin Li }
605*2b54f0dbSXin Li
606*2b54f0dbSXin Li /* If this is an engineering sample, return empty string */
607*2b54f0dbSXin Li if (parser_state.engineering_sample) {
608*2b54f0dbSXin Li return 0;
609*2b54f0dbSXin Li }
610*2b54f0dbSXin Li
611*2b54f0dbSXin Li /* Check if there is some string before the frequency separator. */
612*2b54f0dbSXin Li if (parser_state.frequency_separator != NULL) {
613*2b54f0dbSXin Li if (is_space(name, parser_state.frequency_separator)) {
614*2b54f0dbSXin Li /* If only frequency is available, return empty string */
615*2b54f0dbSXin Li return 0;
616*2b54f0dbSXin Li }
617*2b54f0dbSXin Li }
618*2b54f0dbSXin Li
619*2b54f0dbSXin Li /* Compact tokens: collapse multiple spacing into one */
620*2b54f0dbSXin Li {
621*2b54f0dbSXin Li char* output_ptr = normalized_name;
622*2b54f0dbSXin Li char* token_start;
623*2b54f0dbSXin Li bool is_token = false;
624*2b54f0dbSXin Li bool previous_token_ends_with_dash = true;
625*2b54f0dbSXin Li bool current_token_starts_with_dash = false;
626*2b54f0dbSXin Li uint32_t token_count = 1;
627*2b54f0dbSXin Li for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
628*2b54f0dbSXin Li const char character = *char_ptr;
629*2b54f0dbSXin Li if (character == ' ') {
630*2b54f0dbSXin Li if (is_token) {
631*2b54f0dbSXin Li is_token = false;
632*2b54f0dbSXin Li if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
633*2b54f0dbSXin Li token_count += 1;
634*2b54f0dbSXin Li *output_ptr++ = ' ';
635*2b54f0dbSXin Li }
636*2b54f0dbSXin Li output_ptr = move_token(token_start, char_ptr, output_ptr);
637*2b54f0dbSXin Li /* Note: char_ptr[-1] exists because there is a token before this space */
638*2b54f0dbSXin Li previous_token_ends_with_dash = (char_ptr[-1] == '-');
639*2b54f0dbSXin Li }
640*2b54f0dbSXin Li } else {
641*2b54f0dbSXin Li if (!is_token) {
642*2b54f0dbSXin Li is_token = true;
643*2b54f0dbSXin Li token_start = char_ptr;
644*2b54f0dbSXin Li current_token_starts_with_dash = (character == '-');
645*2b54f0dbSXin Li }
646*2b54f0dbSXin Li }
647*2b54f0dbSXin Li }
648*2b54f0dbSXin Li if (is_token) {
649*2b54f0dbSXin Li if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
650*2b54f0dbSXin Li token_count += 1;
651*2b54f0dbSXin Li *output_ptr++ = ' ';
652*2b54f0dbSXin Li }
653*2b54f0dbSXin Li output_ptr = move_token(token_start, name_end, output_ptr);
654*2b54f0dbSXin Li }
655*2b54f0dbSXin Li if (parser_state.frequency_token && token_count <= 1) {
656*2b54f0dbSXin Li /* The only remaining part is frequency */
657*2b54f0dbSXin Li normalized_name[0] = '\0';
658*2b54f0dbSXin Li return 0;
659*2b54f0dbSXin Li }
660*2b54f0dbSXin Li if (output_ptr < &normalized_name[48]) {
661*2b54f0dbSXin Li *output_ptr = '\0';
662*2b54f0dbSXin Li } else {
663*2b54f0dbSXin Li normalized_name[47] = '\0';
664*2b54f0dbSXin Li }
665*2b54f0dbSXin Li return (uint32_t) (output_ptr - normalized_name);
666*2b54f0dbSXin Li }
667*2b54f0dbSXin Li }
668*2b54f0dbSXin Li
669*2b54f0dbSXin Li static const char* vendor_string_map[] = {
670*2b54f0dbSXin Li [cpuinfo_vendor_intel] = "Intel",
671*2b54f0dbSXin Li [cpuinfo_vendor_amd] = "AMD",
672*2b54f0dbSXin Li [cpuinfo_vendor_via] = "VIA",
673*2b54f0dbSXin Li [cpuinfo_vendor_hygon] = "Hygon",
674*2b54f0dbSXin Li [cpuinfo_vendor_rdc] = "RDC",
675*2b54f0dbSXin Li [cpuinfo_vendor_dmp] = "DM&P",
676*2b54f0dbSXin Li [cpuinfo_vendor_transmeta] = "Transmeta",
677*2b54f0dbSXin Li [cpuinfo_vendor_cyrix] = "Cyrix",
678*2b54f0dbSXin Li [cpuinfo_vendor_rise] = "Rise",
679*2b54f0dbSXin Li [cpuinfo_vendor_nsc] = "NSC",
680*2b54f0dbSXin Li [cpuinfo_vendor_sis] = "SiS",
681*2b54f0dbSXin Li [cpuinfo_vendor_nexgen] = "NexGen",
682*2b54f0dbSXin Li [cpuinfo_vendor_umc] = "UMC",
683*2b54f0dbSXin Li };
684*2b54f0dbSXin Li
cpuinfo_x86_format_package_name(enum cpuinfo_vendor vendor,const char normalized_brand_string[48],char package_name[CPUINFO_PACKAGE_NAME_MAX])685*2b54f0dbSXin Li uint32_t cpuinfo_x86_format_package_name(
686*2b54f0dbSXin Li enum cpuinfo_vendor vendor,
687*2b54f0dbSXin Li const char normalized_brand_string[48],
688*2b54f0dbSXin Li char package_name[CPUINFO_PACKAGE_NAME_MAX])
689*2b54f0dbSXin Li {
690*2b54f0dbSXin Li if (normalized_brand_string[0] == '\0') {
691*2b54f0dbSXin Li package_name[0] = '\0';
692*2b54f0dbSXin Li return 0;
693*2b54f0dbSXin Li }
694*2b54f0dbSXin Li
695*2b54f0dbSXin Li const char* vendor_string = NULL;
696*2b54f0dbSXin Li if ((uint32_t) vendor < (uint32_t) CPUINFO_COUNT_OF(vendor_string_map)) {
697*2b54f0dbSXin Li vendor_string = vendor_string_map[(uint32_t) vendor];
698*2b54f0dbSXin Li }
699*2b54f0dbSXin Li if (vendor_string == NULL) {
700*2b54f0dbSXin Li strncpy(package_name, normalized_brand_string, CPUINFO_PACKAGE_NAME_MAX);
701*2b54f0dbSXin Li package_name[CPUINFO_PACKAGE_NAME_MAX - 1] = '\0';
702*2b54f0dbSXin Li return 0;
703*2b54f0dbSXin Li } else {
704*2b54f0dbSXin Li snprintf(package_name, CPUINFO_PACKAGE_NAME_MAX,
705*2b54f0dbSXin Li "%s %s", vendor_string, normalized_brand_string);
706*2b54f0dbSXin Li return (uint32_t) strlen(vendor_string) + 1;
707*2b54f0dbSXin Li }
708*2b54f0dbSXin Li }
709