xref: /aosp_15_r20/external/pigweed/pw_tokenizer/rust/pw_tokenizer_macro.rs (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // This proc macro crate is a private API for the `pw_tokenizer` crate.
16 #![doc(hidden)]
17 
18 use std::ffi::CString;
19 
20 use proc_macro::TokenStream;
21 use proc_macro2::Ident;
22 use quote::{format_ident, quote, ToTokens};
23 use syn::{
24     parse::{Parse, ParseStream},
25     parse_macro_input, Expr, LitStr, Token, Type,
26 };
27 
28 use pw_format::macros::{
29     generate_printf, Arg, CoreFmtFormatStringParser, FormatAndArgsFlavor, FormatStringParser,
30     PrintfFormatMacroGenerator, PrintfFormatStringFragment, PrintfFormatStringParser, Result,
31 };
32 use pw_tokenizer_core::TOKENIZER_ENTRY_MAGIC;
33 
34 type TokenStream2 = proc_macro2::TokenStream;
35 
36 // Handles tokenizing (hashing) `fragments` and adding them to the token database
37 // with the specified `domain`.  A detailed description of what's happening is
38 // found in the docs for [`pw_tokenizer::token`] macro.
token_backend(domain: &str, fragments: &[TokenStream2]) -> TokenStream239 fn token_backend(domain: &str, fragments: &[TokenStream2]) -> TokenStream2 {
40     let ident = format_ident!("_PW_TOKENIZER_STRING_ENTRY_RUST");
41 
42     // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
43     // executables) do not support section names longer than 16 characters, so a
44     // short, unused section name is used on macOS.
45     let section = if cfg!(target_os = "macos") {
46         ",pw,".to_string()
47     } else {
48         ".pw_tokenizer.entries.rust".to_string()
49     };
50 
51     let domain = CString::new(domain).unwrap();
52     let domain_bytes = domain.as_bytes_with_nul();
53     let domain_bytes_len = domain_bytes.len();
54 
55     quote! {
56         // Use an inner scope to avoid identifier collision.  Name mangling
57         // will disambiguate these in the symbol table.
58         {
59             const STRING: &str = __pw_tokenizer_crate::concat_static_strs!(#(#fragments),*);
60             const STRING_BYTES: &[u8] = STRING.as_bytes();
61             const STRING_LEN: usize = STRING_BYTES.len();
62 
63             const HASH: u32 = __pw_tokenizer_crate::hash_string(STRING);
64 
65             #[repr(C, packed(1))]
66             struct TokenEntry {
67                 magic: u32,
68                 token: u32,
69                 domain_size: u32,
70                 string_length: u32,
71                 domain: [u8; #domain_bytes_len],
72                 string: [u8; STRING_LEN],
73                 null_terminator: u8,
74             };
75             // This is currently manually verified to be correct.
76             // TODO: b/287132907 - Add integration tests for token database.
77             #[link_section = #section ]
78             #[used]
79             static #ident: TokenEntry = TokenEntry {
80                 magic: #TOKENIZER_ENTRY_MAGIC,
81                 token: HASH,
82                 domain_size: #domain_bytes_len as u32,
83                 string_length: (STRING_LEN + 1) as u32,
84                 domain: [ #(#domain_bytes),* ],
85                 // Safety: `STRING_LEN` is declared as the length of `STRING_BYTES` above.
86                 string: unsafe { *::core::mem::transmute::<_, *const [u8; STRING_LEN]>(STRING_BYTES.as_ptr()) },
87                 null_terminator: 0u8,
88             };
89 
90             HASH
91         }
92     }
93 }
94 
95 // Documented in `pw_tokenizer::token`.
96 #[proc_macro]
_token(tokens: TokenStream) -> TokenStream97 pub fn _token(tokens: TokenStream) -> TokenStream {
98     let input = parse_macro_input!(tokens as LitStr);
99     token_backend("", &[input.into_token_stream()]).into()
100 }
101 
102 // Args to tokenize to buffer that are parsed according to the pattern:
103 //   ($buffer:expr, $format_string:literal, $($args:expr),*)
104 #[derive(Debug)]
105 struct TokenizeToBufferArgs<T: FormatStringParser + core::fmt::Debug> {
106     buffer: Expr,
107     format_and_args: FormatAndArgsFlavor<T>,
108 }
109 
110 impl<T: FormatStringParser + core::fmt::Debug> Parse for TokenizeToBufferArgs<T> {
parse(input: ParseStream) -> syn::parse::Result<Self>111     fn parse(input: ParseStream) -> syn::parse::Result<Self> {
112         let buffer: Expr = input.parse()?;
113         input.parse::<Token![,]>()?;
114         let format_and_args: FormatAndArgsFlavor<_> = input.parse()?;
115 
116         Ok(TokenizeToBufferArgs {
117             buffer,
118             format_and_args,
119         })
120     }
121 }
122 
123 // A PrintfFormatMacroGenerator that provides the code generation backend for
124 // the `tokenize_to_buffer!` macro.
125 struct TokenizeToBufferGenerator<'a> {
126     domain: &'a str,
127     buffer: &'a Expr,
128     encoding_fragments: Vec<TokenStream2>,
129 }
130 
131 impl<'a> TokenizeToBufferGenerator<'a> {
new(domain: &'a str, buffer: &'a Expr) -> Self132     fn new(domain: &'a str, buffer: &'a Expr) -> Self {
133         Self {
134             domain,
135             buffer,
136             encoding_fragments: Vec::new(),
137         }
138     }
139 }
140 
141 impl<'a> PrintfFormatMacroGenerator for TokenizeToBufferGenerator<'a> {
finalize( self, format_string_fragments: &[PrintfFormatStringFragment], ) -> Result<TokenStream2>142     fn finalize(
143         self,
144         format_string_fragments: &[PrintfFormatStringFragment],
145     ) -> Result<TokenStream2> {
146         // Locally scoped aliases so we can refer to them in `quote!()`
147         let buffer = self.buffer;
148         let encoding_fragments = self.encoding_fragments;
149 
150         let format_string_pieces: Vec<_> = format_string_fragments
151             .iter()
152             .map(|fragment| fragment.as_token_stream("__pw_tokenizer_crate"))
153             .collect::<Result<Vec<_>>>()?;
154 
155         // `token_backend` returns a `TokenStream2` which both inserts the
156         // string into the token database and returns the hash value.
157         let token = token_backend(self.domain, &format_string_pieces);
158 
159         if encoding_fragments.is_empty() {
160             Ok(quote! {
161               {
162                 __pw_tokenizer_crate::internal::tokenize_to_buffer_no_args(#buffer, #token)
163               }
164             })
165         } else {
166             Ok(quote! {
167               {
168                 use __pw_tokenizer_crate::internal::Argument;
169                 __pw_tokenizer_crate::internal::tokenize_to_buffer(
170                   #buffer,
171                   #token,
172                   &[#(#encoding_fragments),*]
173                 )
174               }
175             })
176         }
177     }
178 
string_fragment(&mut self, _string: &str) -> Result<()>179     fn string_fragment(&mut self, _string: &str) -> Result<()> {
180         // String fragments are encoded directly into the format string.
181         Ok(())
182     }
183 
integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>>184     fn integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>> {
185         self.encoding_fragments.push(quote! {
186           Argument::Varint(#ty::from(#expression) as i32)
187         });
188 
189         Ok(None)
190     }
191 
string_conversion(&mut self, expression: Arg) -> Result<Option<String>>192     fn string_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
193         self.encoding_fragments.push(quote! {
194           Argument::String(#expression)
195         });
196         Ok(None)
197     }
198 
char_conversion(&mut self, expression: Arg) -> Result<Option<String>>199     fn char_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
200         self.encoding_fragments.push(quote! {
201           Argument::Char(u8::from(#expression))
202         });
203         Ok(None)
204     }
205 
untyped_conversion(&mut self, expression: Arg) -> Result<()>206     fn untyped_conversion(&mut self, expression: Arg) -> Result<()> {
207         self.encoding_fragments.push(quote! {
208           Argument::from(#expression)
209         });
210         Ok(())
211     }
212 }
213 
214 /// Generates code to marshal a tokenized core::fmt format string and arguments
215 /// into a buffer.  See [`pw_tokenizer::tokenize_core_fmt_to_buffer`] for details
216 /// on behavior.
217 ///
218 /// Internally the [`AsMut<u8>`] is wrapped in a [`pw_stream::Cursor`] to
219 /// fill the buffer incrementally.
220 #[proc_macro]
_tokenize_core_fmt_to_buffer(tokens: TokenStream) -> TokenStream221 pub fn _tokenize_core_fmt_to_buffer(tokens: TokenStream) -> TokenStream {
222     let input = parse_macro_input!(tokens as TokenizeToBufferArgs<CoreFmtFormatStringParser>);
223 
224     // Hard codes domain to "".
225     let generator = TokenizeToBufferGenerator::new("", &input.buffer);
226 
227     match generate_printf(generator, input.format_and_args.into()) {
228         Ok(token_stream) => token_stream.into(),
229         Err(e) => e.to_compile_error().into(),
230     }
231 }
232 
233 /// Generates code to marshal a tokenized printf format string and arguments
234 /// into a buffer.  See [`pw_tokenizer::tokenize_printf_to_buffer`] for details
235 /// on behavior.
236 ///
237 /// Internally the [`AsMut<u8>`] is wrapped in a [`pw_stream::Cursor`] to
238 /// fill the buffer incrementally.
239 #[proc_macro]
_tokenize_printf_to_buffer(tokens: TokenStream) -> TokenStream240 pub fn _tokenize_printf_to_buffer(tokens: TokenStream) -> TokenStream {
241     let input = parse_macro_input!(tokens as TokenizeToBufferArgs<PrintfFormatStringParser>);
242 
243     // Hard codes domain to "".
244     let generator = TokenizeToBufferGenerator::new("", &input.buffer);
245 
246     match generate_printf(generator, input.format_and_args.into()) {
247         Ok(token_stream) => token_stream.into(),
248         Err(e) => e.to_compile_error().into(),
249     }
250 }
251 
252 // Args to tokenize to buffer that are parsed according to the pattern:
253 //   ($ty:ty, $format_string:literal, $($args:expr),*)
254 #[derive(Debug)]
255 struct TokenizeToWriterArgs<T: FormatStringParser> {
256     ty: Type,
257     format_and_args: FormatAndArgsFlavor<T>,
258 }
259 
260 impl<T: FormatStringParser> Parse for TokenizeToWriterArgs<T> {
parse(input: ParseStream) -> syn::parse::Result<Self>261     fn parse(input: ParseStream) -> syn::parse::Result<Self> {
262         let ty: Type = input.parse()?;
263         input.parse::<Token![,]>()?;
264         let format_and_args: FormatAndArgsFlavor<_> = input.parse()?;
265 
266         Ok(Self {
267             ty,
268             format_and_args,
269         })
270     }
271 }
272 
273 // A PrintfFormatMacroGenerator that provides the code generation backend for
274 // the `tokenize_to_writer!` macro.
275 struct TokenizeToWriterGenerator<'a> {
276     domain: &'a str,
277     ty: &'a Type,
278     encoding_fragments: Vec<TokenStream2>,
279 }
280 
281 impl<'a> TokenizeToWriterGenerator<'a> {
new(domain: &'a str, ty: &'a Type) -> Self282     fn new(domain: &'a str, ty: &'a Type) -> Self {
283         Self {
284             domain,
285             ty,
286             encoding_fragments: Vec::new(),
287         }
288     }
289 }
290 
291 impl<'a> PrintfFormatMacroGenerator for TokenizeToWriterGenerator<'a> {
finalize( self, format_string_fragments: &[PrintfFormatStringFragment], ) -> Result<TokenStream2>292     fn finalize(
293         self,
294         format_string_fragments: &[PrintfFormatStringFragment],
295     ) -> Result<TokenStream2> {
296         // Locally scoped aliases so we can refer to them in `quote!()`
297         let ty = self.ty;
298         let encoding_fragments = self.encoding_fragments;
299 
300         let format_string_pieces: Vec<_> = format_string_fragments
301             .iter()
302             .map(|fragment| fragment.as_token_stream("__pw_tokenizer_crate"))
303             .collect::<Result<Vec<_>>>()?;
304 
305         // `token_backend` returns a `TokenStream2` which both inserts the
306         // string into the token database and returns the hash value.
307         let token = token_backend(self.domain, &format_string_pieces);
308 
309         if encoding_fragments.is_empty() {
310             Ok(quote! {
311               {
312                 __pw_tokenizer_crate::internal::tokenize_to_writer_no_args::<#ty>(#token)
313               }
314             })
315         } else {
316             Ok(quote! {
317               {
318                 use __pw_tokenizer_crate::internal::Argument;
319                 __pw_tokenizer_crate::internal::tokenize_to_writer::<#ty>(
320                   #token,
321                   &[#(#encoding_fragments),*]
322                 )
323               }
324             })
325         }
326     }
327 
string_fragment(&mut self, _string: &str) -> Result<()>328     fn string_fragment(&mut self, _string: &str) -> Result<()> {
329         // String fragments are encoded directly into the format string.
330         Ok(())
331     }
332 
integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>>333     fn integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>> {
334         self.encoding_fragments.push(quote! {
335           Argument::Varint(#ty::from(#expression) as i32)
336         });
337 
338         Ok(None)
339     }
340 
string_conversion(&mut self, expression: Arg) -> Result<Option<String>>341     fn string_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
342         self.encoding_fragments.push(quote! {
343           Argument::String(#expression)
344         });
345         Ok(None)
346     }
347 
char_conversion(&mut self, expression: Arg) -> Result<Option<String>>348     fn char_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
349         self.encoding_fragments.push(quote! {
350           Argument::Char(u8::from(#expression))
351         });
352         Ok(None)
353     }
354 
untyped_conversion(&mut self, expression: Arg) -> Result<()>355     fn untyped_conversion(&mut self, expression: Arg) -> Result<()> {
356         self.encoding_fragments.push(quote! {
357           Argument::from(#expression)
358         });
359         Ok(())
360     }
361 }
362 
363 /// Generates code to marshal a tokenized core::fmt format string and arguments
364 /// into a [`pw_stream::Write`].  See [`pw_tokenizer::tokenize_core_fmt_to_writer`]
365 /// for details on behavior.
366 #[proc_macro]
_tokenize_core_fmt_to_writer(tokens: TokenStream) -> TokenStream367 pub fn _tokenize_core_fmt_to_writer(tokens: TokenStream) -> TokenStream {
368     let input = parse_macro_input!(tokens as TokenizeToWriterArgs<CoreFmtFormatStringParser>);
369 
370     // Hard codes domain to "".
371     let generator = TokenizeToWriterGenerator::new("", &input.ty);
372 
373     match generate_printf(generator, input.format_and_args.into()) {
374         Ok(token_stream) => token_stream.into(),
375         Err(e) => e.to_compile_error().into(),
376     }
377 }
378 
379 /// Generates code to marshal a tokenized printf format string and arguments
380 /// into a [`pw_stream::Write`].  See [`pw_tokenizer::tokenize_printf_to_writer`]
381 /// for details on behavior.
382 #[proc_macro]
_tokenize_printf_to_writer(tokens: TokenStream) -> TokenStream383 pub fn _tokenize_printf_to_writer(tokens: TokenStream) -> TokenStream {
384     let input = parse_macro_input!(tokens as TokenizeToWriterArgs<PrintfFormatStringParser>);
385 
386     // Hard codes domain to "".
387     let generator = TokenizeToWriterGenerator::new("", &input.ty);
388 
389     match generate_printf(generator, input.format_and_args.into()) {
390         Ok(token_stream) => token_stream.into(),
391         Err(e) => e.to_compile_error().into(),
392     }
393 }
394 
395 // Macros tested in `pw_tokenizer` crate.
396 #[cfg(test)]
397 mod tests {}
398