1 // Copyright © 2024 Igalia S.L.
2 // SPDX-License-Identifier: MIT
3
4 use crate::isa::{BitSetEnumValue, ISA};
5
6 use proc_macro::TokenStream;
7 use proc_macro2::TokenStream as TokenStream2;
8 use quote::quote;
9 use quote::ToTokens;
10 use roxmltree::Document;
11 use std::fs;
12 use std::path::Path;
13 use syn::{parse_macro_input, parse_quote, Attribute, DeriveInput, Expr, ExprLit, Lit, Meta};
14
15 mod isa;
16
17 /// Parses the derive input to extract file paths from attributes
18 ///
19 /// # Returns
20 /// A tuple containing the paths to the ISA and static rules files
21 ///
22 /// # Panics
23 /// Panics if the necessary attributes are not found or are in the wrong format
parse_derive(ast: &DeriveInput) -> (String, String)24 pub(crate) fn parse_derive(ast: &DeriveInput) -> (String, String) {
25 // Collect attributes with the name "isa"
26 let isa_attr = ast
27 .attrs
28 .iter()
29 .find(|attr| {
30 let path = attr.meta.path();
31 path.is_ident("isa")
32 })
33 .expect("An ISA file needs to be provided with the #[isa = \"PATH\"] attribute");
34
35 // Get the path from the "isa" attribute
36 let isa_path = get_attribute(isa_attr);
37
38 // Collect attributes with the name "static_rules_file"
39 let static_rules_attr = ast
40 .attrs
41 .iter()
42 .find(|attr| {
43 let path = attr.meta.path();
44 path.is_ident("static_rules_file")
45 })
46 .expect("A static pest rules file needs to be provided with the #[static_rules_file = \"PATH\"] attribute");
47
48 // Get the path from the "static_rules_file" attribute
49 let static_rules_path = get_attribute(static_rules_attr);
50
51 (isa_path, static_rules_path)
52 }
53
54 /// Extracts the string value from a name-value attribute
55 ///
56 /// # Panics
57 /// Panics if the attribute is not in the expected format
get_attribute(attr: &Attribute) -> String58 fn get_attribute(attr: &Attribute) -> String {
59 match &attr.meta {
60 Meta::NameValue(name_value) => match &name_value.value {
61 Expr::Lit(ExprLit {
62 lit: Lit::Str(string),
63 ..
64 }) => {
65 if name_value.path.is_ident("isa") || name_value.path.is_ident("static_rules_file")
66 {
67 string.value()
68 } else {
69 panic!("Attribute must be a file path")
70 }
71 }
72 _ => panic!("Attribute must be a string"),
73 },
74 _ => panic!("Attribute must be of the form `key = \"...\"`"),
75 }
76 }
77
78 /// Formats an enum value as a string in uppercase with underscores
format_enum_value_str(enum_name: &str, enum_value: &str) -> String79 fn format_enum_value_str(enum_name: &str, enum_value: &str) -> String {
80 format!("{}_{}", enum_name, enum_value.replace(['.', '[', ']'], "")).to_ascii_uppercase()
81 }
82
83 /// Retrieves and formats the enum value string from a `BitSetEnumValue`
get_enum_value_str(enum_name: &str, enum_value: &BitSetEnumValue) -> String84 fn get_enum_value_str(enum_name: &str, enum_value: &BitSetEnumValue) -> String {
85 format_enum_value_str(enum_name, enum_value.name.unwrap_or(enum_value.display))
86 }
87
88 /// Generates the implementation of `FromPestRule` for enums in the ISA
generate_from_rule_impl_enums(isa: &ISA) -> TokenStream289 fn generate_from_rule_impl_enums(isa: &ISA) -> TokenStream2 {
90 isa.enums
91 .values()
92 .map(|e| {
93 let enum_name_str = format!("isa_{}", e.name.trim_start_matches('#'));
94
95 let enum_name = syn::Ident::new(&enum_name_str, proc_macro2::Span::call_site());
96 let match_arms: Vec<_> = e
97 .values
98 .iter()
99 .filter(|v| !v.display.is_empty() && v.display != ".____")
100 .map(|v| {
101 let variant_name = syn::Ident::new(
102 get_enum_value_str(&enum_name_str, v).as_str(),
103 proc_macro2::Span::call_site(),
104 );
105 let rule_name = syn::Ident::new(
106 &to_upper_camel_case(v.name.unwrap_or(v.display), false),
107 proc_macro2::Span::call_site(),
108 );
109 quote! { Rule::#rule_name => #enum_name::#variant_name }
110 })
111 .collect();
112
113 quote! {
114 impl FromPestRule for #enum_name {
115 fn from_rule(rule: Rule) -> Self where Self: Sized {
116 match rule {
117 #(#match_arms),*,
118 _ => panic!("Unexpected rule: {:?}", rule),
119 }
120 }
121 }
122 }
123 })
124 .collect()
125 }
126
127 /// Generates the implementation of `FromPestRule` for ISA opcodes
generate_from_rule_impl_opc(isa: &ISA) -> TokenStream2128 fn generate_from_rule_impl_opc(isa: &ISA) -> TokenStream2 {
129 let instr_name = syn::Ident::new("isa_opc", proc_macro2::Span::call_site());
130
131 let match_arms: Vec<_> = isa
132 .bitsets
133 .values()
134 .filter(|bitset| !bitset.name.starts_with('#'))
135 .map(|instr| {
136 let variant_name = syn::Ident::new(
137 format_enum_value_str("isa_opc", instr.name).as_str(),
138 proc_macro2::Span::call_site(),
139 );
140
141 let pest_rule = format!("Opc_{}", instr.name);
142
143 let rule_name = syn::Ident::new(
144 &to_upper_camel_case(pest_rule.as_str(), true),
145 proc_macro2::Span::call_site(),
146 );
147 quote! { Rule::#rule_name => #instr_name::#variant_name }
148 })
149 .collect();
150
151 quote! {
152 impl FromPestRule for isa_opc {
153 fn from_rule(rule: Rule) -> Self where Self: Sized {
154 match rule {
155 #(#match_arms),*,
156 _ => panic!("Unexpected rule: {:?}", rule),
157 }
158 }
159 }
160 }
161 }
162
163 /// Main derive function to generate the parser
derive_parser(input: TokenStream) -> TokenStream164 fn derive_parser(input: TokenStream) -> TokenStream {
165 let mut ast: DeriveInput = parse_macro_input!(input as DeriveInput);
166 let root = "../src/etnaviv/isa/";
167 let (isa_filename, static_rules_filename) = parse_derive(&ast);
168 let isa_path = Path::new(&root).join(isa_filename);
169 let static_rules_path = Path::new(&root).join(static_rules_filename);
170
171 // Load the XML document
172 let xml_content = fs::read_to_string(isa_path).expect("Failed to read XML file");
173 let doc = Document::parse(&xml_content).expect("Failed to parse XML");
174 let isa = ISA::new(&doc);
175
176 // Load the static rules
177 let mut grammar =
178 fs::read_to_string(static_rules_path).expect("Failed to read static rules pest file");
179
180 // Append generated grammar rules
181 grammar.push_str(&generate_peg_grammar(&isa));
182
183 // Add grammar as an attribute to the AST
184 ast.attrs.push(parse_quote! {
185 #[grammar_inline = #grammar]
186 });
187
188 // Generate the token streams for the parser, trait, and rule implementations
189 let tokens_parser = pest_generator::derive_parser(ast.to_token_stream(), false);
190 let tokens_from_rule_enums = generate_from_rule_impl_enums(&isa);
191 let tokens_from_rule_opc = generate_from_rule_impl_opc(&isa);
192
193 // Combine all token streams into one
194 let tokens = quote! {
195 #tokens_parser
196
197 pub trait FromPestRule {
198 fn from_rule(rule: Rule) -> Self where Self: Sized;
199 }
200
201 #tokens_from_rule_enums
202 #tokens_from_rule_opc
203 };
204
205 tokens.into()
206 }
207
208 /// Generates PEG grammar rules for enums
generate_peg_grammar_enums(isa: &ISA) -> String209 fn generate_peg_grammar_enums(isa: &ISA) -> String {
210 let mut grammar = String::new();
211
212 for e in isa.enums.values() {
213 let mut values: Vec<_> = e
214 .values
215 .iter()
216 .filter(|v| !v.display.is_empty() && v.display != ".____")
217 .collect();
218
219 // From the pest docs:
220 // The choice operator, written as a vertical line |, is ordered. The PEG
221 // expression first | second means "try first; but if it fails, try second instead".
222 //
223 // We need to sort our enum to be able to parse eg th1.xxxx and t1.xxxx
224 values.sort_by(|a, b| b.display.cmp(a.display));
225
226 let rule_name = to_upper_camel_case(e.name.trim_start_matches('#'), true);
227
228 let value_names: Vec<_> = values
229 .iter()
230 .map(|enum_value| {
231 to_upper_camel_case(enum_value.name.unwrap_or(enum_value.display), false)
232 })
233 .collect();
234
235 grammar.push_str(&format!(
236 "{} = {{ {} }}\n",
237 rule_name,
238 value_names.join(" | ")
239 ));
240
241 for value in &values {
242 let variant_name = to_upper_camel_case(value.name.unwrap_or(value.display), false);
243 grammar.push_str(&format!(
244 " {} = {{ \"{}\" }}\n",
245 variant_name, value.display
246 ));
247 }
248
249 grammar.push('\n')
250 }
251
252 grammar
253 }
254
255 /// Generates PEG grammar rules for instructions
generate_peg_grammar_instructions(isa: &ISA) -> String256 fn generate_peg_grammar_instructions(isa: &ISA) -> String {
257 let mut grammar = String::new();
258
259 // Collect instructions that do not start with "#"
260 let instructions: Vec<_> = isa
261 .bitsets
262 .values()
263 .filter(|bitset| !bitset.name.starts_with('#'))
264 .collect();
265
266 // Generate instruction names
267 let instruction_names: Vec<_> = instructions
268 .iter()
269 .map(|instruction| format!("Opc{}", to_upper_camel_case(instruction.name, true)))
270 .collect();
271
272 // Join instruction names and append to grammar
273 grammar.push_str(&format!(
274 "instruction = _{{ {} }}\n",
275 instruction_names.join(" | ")
276 ));
277
278 for (instruction, opcode) in std::iter::zip(instructions, instruction_names) {
279 let meta = isa.collect_meta(instruction.name);
280 let type_ = meta.get("type").copied().unwrap_or("");
281
282 // Prepare rule parts
283 let mut rule_parts = Vec::new();
284 rule_parts.push(format!(
285 "\"{}\"",
286 instruction.displayname.unwrap_or(instruction.name)
287 ));
288
289 let template_key = format!("INSTR_{}", type_.to_ascii_uppercase());
290 let flags = isa
291 .templates
292 .get(template_key.as_str())
293 .map_or("", |template| template.display.trim());
294
295 // Process flags
296 // Convert the XML string to a vec and filter out not wanted NAME.
297 // e.g.: {NAME}{DST_FULL}{SAT}{COND}{SKPHP}{TYPE}{PMODE}{THREAD}{RMODE} to
298 // ["Dst_full", "Sat", "Cond", "Skphp", "Type", "Pmode", "Thread", "Rounding"]
299 flags
300 .split(&['{', '}'])
301 .filter(|part| !part.trim().is_empty() && *part != "NAME")
302 .for_each(|part| {
303 let part = if part == "RMODE" { "Rounding" } else { part };
304 rule_parts.push(format!("{}?", to_upper_camel_case(part, false)));
305 });
306
307 let has_dest = meta
308 .get("has_dest")
309 .map(|b| b.parse::<bool>())
310 .unwrap_or(Ok(false))
311 .expect("has_dest must be a bool value (true|false)");
312
313 let rule_part = match (has_dest, type_) {
314 (true, "load_store") => "(Dest | DstMemAddr) ~ \",\"",
315 (true, _) => "Dest ~ \",\"",
316 (false, _) => "DestVoid ~ \",\"",
317 };
318
319 rule_parts.push(rule_part.to_string());
320
321 if type_ == "tex" {
322 rule_parts.push("TexSrc ~ \",\"".to_string());
323 }
324
325 let possible_srcs = if type_ == "cf" { 2 } else { 3 };
326 let valid_srcs: Vec<_> = meta
327 .get("valid_srcs")
328 .unwrap_or(&"")
329 .split('|')
330 .filter_map(|s| s.parse::<usize>().ok())
331 .collect();
332
333 for i in 0..possible_srcs {
334 if valid_srcs.contains(&i) {
335 rule_parts.push("Src".to_string());
336 } else {
337 rule_parts.push("SrcVoid".to_string());
338 }
339 if i + 1 < possible_srcs {
340 rule_parts.push("\",\"".to_string());
341 }
342 }
343
344 if type_ == "cf" {
345 rule_parts.push("\",\"".to_string());
346 rule_parts.push("Target".to_string());
347 }
348
349 grammar.push_str(&format!(
350 " {} = {{ {} }}\n",
351 opcode,
352 rule_parts.join(" ~ ")
353 ));
354 }
355
356 grammar
357 }
358
359 /// Combines the PEG grammar rules for enums and instructions
generate_peg_grammar(isa: &ISA) -> String360 fn generate_peg_grammar(isa: &ISA) -> String {
361 let mut grammar = String::new();
362
363 grammar.push_str(&generate_peg_grammar_enums(isa));
364 grammar.push_str(&generate_peg_grammar_instructions(isa));
365 grammar.push_str("instructions = _{ SOI ~ (instruction ~ NEWLINE?)* ~ EOI }");
366
367 grammar
368 }
369
370 /// Converts a string to UpperCamelCase
371 ///
372 /// # Arguments
373 /// * `s` - The input string
374 /// * `replace_underscores` - Whether to replace underscores with spaces
to_upper_camel_case(s: &str, replace_underscores: bool) -> String375 fn to_upper_camel_case(s: &str, replace_underscores: bool) -> String {
376 // remove unwanted characters
377 let mut s = s.replace(['.', '[', ']'], "");
378
379 // optionally replace underscores with spaces
380 if replace_underscores {
381 s = s.replace('_', " ");
382 }
383
384 // capitalize the first letter of each word and join them
385 s.split_whitespace()
386 .map(|word| {
387 let mut chars = word.chars();
388 match chars.next() {
389 Some(first) => first
390 .to_uppercase()
391 .chain(chars.flat_map(|c| c.to_lowercase()))
392 .collect(),
393 None => String::new(),
394 }
395 })
396 .collect()
397 }
398
399 /// Procedural macro to derive the ISA parser
400 #[proc_macro_derive(IsaParser, attributes(isa, static_rules_file))]
derive_isaspec_parser(input: TokenStream) -> TokenStream401 pub fn derive_isaspec_parser(input: TokenStream) -> TokenStream {
402 derive_parser(input)
403 }
404
405 #[cfg(test)]
406 mod lib {
407 use super::*;
408 use crate::isa::{BitSetEnum, BitSetEnumValue, Bitset, BitsetTemplate, ISA};
409 use indexmap::IndexMap;
410 use std::collections::HashMap;
411
412 #[test]
derive_ok()413 fn derive_ok() {
414 let definition = "
415 #[other_attr]
416 #[isa = \"myfile.isa\"]
417 #[static_rules_file = \"static_rules.pest\"]
418 pub struct MyParser<'a, T>;
419 ";
420 let ast = syn::parse_str(definition).unwrap();
421 let (isa, static_rules) = parse_derive(&ast);
422 assert_eq!(isa, "myfile.isa");
423 assert_eq!(static_rules, "static_rules.pest");
424 }
425
426 #[test]
427 #[should_panic(expected = "Attribute must be a string")]
derive_wrong_arg_isa()428 fn derive_wrong_arg_isa() {
429 let definition = "
430 #[other_attr]
431 #[isa = 1]
432 #[static_rules_file = \"static_rules.pest\"]
433 pub struct MyParser<'a, T>;
434 ";
435 let ast = syn::parse_str(definition).unwrap();
436 parse_derive(&ast);
437 }
438
439 #[test]
440 #[should_panic(expected = "Attribute must be a string")]
derive_wrong_arg_static_rules_file()441 fn derive_wrong_arg_static_rules_file() {
442 let definition = "
443 #[other_attr]
444 #[isa = \"test.xml\"]
445 #[static_rules_file = 1]
446 pub struct MyParser<'a, T>;
447 ";
448 let ast = syn::parse_str(definition).unwrap();
449 parse_derive(&ast);
450 }
451
452 #[test]
453 #[should_panic(
454 expected = "An ISA file needs to be provided with the #[isa = \"PATH\"] attribute"
455 )]
derive_no_isa()456 fn derive_no_isa() {
457 let definition = "
458 #[other_attr]
459 pub struct MyParser<'a, T>;
460 ";
461 let ast = syn::parse_str(definition).unwrap();
462 parse_derive(&ast);
463 }
464
465 #[test]
test_to_upper_camel_case()466 fn test_to_upper_camel_case() {
467 assert_eq!(to_upper_camel_case("test_string", true), "TestString");
468 assert_eq!(to_upper_camel_case("test_string", false), "Test_string");
469 assert_eq!(to_upper_camel_case("[Test]_String", true), "TestString");
470 assert_eq!(to_upper_camel_case("[Test]_String", false), "Test_string");
471 assert_eq!(
472 to_upper_camel_case("multiple_words_string", true),
473 "MultipleWordsString"
474 );
475 }
476
mock_isa() -> ISA<'static>477 fn mock_isa() -> ISA<'static> {
478 let mut bitsets = IndexMap::new();
479 let mut enums = IndexMap::new();
480 let mut templates = IndexMap::new();
481
482 // Add mock data for bitsets, enums, and templates
483 // Example for bitsets
484 bitsets.insert(
485 "bitset1",
486 Bitset {
487 name: "bitset1",
488 extends: None,
489 meta: HashMap::from([("type", "alu"), ("has_dest", "true"), ("valid_srcs", "0")]),
490 },
491 );
492
493 // Example for enums
494 enums.insert(
495 "enum1",
496 BitSetEnum {
497 name: "enum1",
498 values: vec![
499 BitSetEnumValue {
500 display: "val1",
501 name: Some("val1_name"),
502 },
503 BitSetEnumValue {
504 display: "val2",
505 name: Some("val2_name"),
506 },
507 ],
508 },
509 );
510
511 // Example for templates
512 templates.insert(
513 "INSTR_ALU",
514 BitsetTemplate {
515 display: "{DST_FULL}{SAT}{COND}",
516 },
517 );
518
519 ISA {
520 bitsets,
521 enums,
522 templates,
523 }
524 }
525
526 #[test]
test_generate_peg_grammar_enums()527 fn test_generate_peg_grammar_enums() {
528 let isa = mock_isa();
529 let grammar = generate_peg_grammar_enums(&isa);
530 assert!(grammar.contains("Enum1 = { Val2 | Val1 }"));
531 assert!(grammar.contains("Val1 = { \"val1\" }"));
532 assert!(grammar.contains("Val2 = { \"val2\" }"));
533 }
534
535 #[test]
test_generate_peg_grammar_instructions()536 fn test_generate_peg_grammar_instructions() {
537 let isa = mock_isa();
538 let grammar = generate_peg_grammar_instructions(&isa);
539 assert!(grammar.contains("instructions = _{ OpcBitset1 }"));
540 assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }"));
541 }
542
543 #[test]
test_generate_peg_grammar()544 fn test_generate_peg_grammar() {
545 let isa = mock_isa();
546 let grammar = generate_peg_grammar(&isa);
547 assert!(grammar.contains("Enum1 = { Val2 | Val1 }"));
548 assert!(grammar.contains("instructions = _{ OpcBitset1 }"));
549 assert!(grammar.contains("OpcBitset1 = { \"bitset1\" ~ Dst_full? ~ Sat? ~ Cond? ~ Dest ~ \",\" ~ Src ~ \",\" ~ SrcVoid ~ \",\" ~ SrcVoid }"));
550 }
551 }
552