1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 // This proc macro crate is a private API for the `pw_tokenizer` crate.
16 #![doc(hidden)]
17
18 use std::ffi::CString;
19
20 use proc_macro::TokenStream;
21 use proc_macro2::Ident;
22 use quote::{format_ident, quote, ToTokens};
23 use syn::{
24 parse::{Parse, ParseStream},
25 parse_macro_input, Expr, LitStr, Token, Type,
26 };
27
28 use pw_format::macros::{
29 generate_printf, Arg, CoreFmtFormatStringParser, FormatAndArgsFlavor, FormatStringParser,
30 PrintfFormatMacroGenerator, PrintfFormatStringFragment, PrintfFormatStringParser, Result,
31 };
32 use pw_tokenizer_core::TOKENIZER_ENTRY_MAGIC;
33
34 type TokenStream2 = proc_macro2::TokenStream;
35
36 // Handles tokenizing (hashing) `fragments` and adding them to the token database
37 // with the specified `domain`. A detailed description of what's happening is
38 // found in the docs for [`pw_tokenizer::token`] macro.
token_backend(domain: &str, fragments: &[TokenStream2]) -> TokenStream239 fn token_backend(domain: &str, fragments: &[TokenStream2]) -> TokenStream2 {
40 let ident = format_ident!("_PW_TOKENIZER_STRING_ENTRY_RUST");
41
42 // pw_tokenizer is intended for use with ELF files only. Mach-O files (macOS
43 // executables) do not support section names longer than 16 characters, so a
44 // short, unused section name is used on macOS.
45 let section = if cfg!(target_os = "macos") {
46 ",pw,".to_string()
47 } else {
48 ".pw_tokenizer.entries.rust".to_string()
49 };
50
51 let domain = CString::new(domain).unwrap();
52 let domain_bytes = domain.as_bytes_with_nul();
53 let domain_bytes_len = domain_bytes.len();
54
55 quote! {
56 // Use an inner scope to avoid identifier collision. Name mangling
57 // will disambiguate these in the symbol table.
58 {
59 const STRING: &str = __pw_tokenizer_crate::concat_static_strs!(#(#fragments),*);
60 const STRING_BYTES: &[u8] = STRING.as_bytes();
61 const STRING_LEN: usize = STRING_BYTES.len();
62
63 const HASH: u32 = __pw_tokenizer_crate::hash_string(STRING);
64
65 #[repr(C, packed(1))]
66 struct TokenEntry {
67 magic: u32,
68 token: u32,
69 domain_size: u32,
70 string_length: u32,
71 domain: [u8; #domain_bytes_len],
72 string: [u8; STRING_LEN],
73 null_terminator: u8,
74 };
75 // This is currently manually verified to be correct.
76 // TODO: b/287132907 - Add integration tests for token database.
77 #[link_section = #section ]
78 #[used]
79 static #ident: TokenEntry = TokenEntry {
80 magic: #TOKENIZER_ENTRY_MAGIC,
81 token: HASH,
82 domain_size: #domain_bytes_len as u32,
83 string_length: (STRING_LEN + 1) as u32,
84 domain: [ #(#domain_bytes),* ],
85 // Safety: `STRING_LEN` is declared as the length of `STRING_BYTES` above.
86 string: unsafe { *::core::mem::transmute::<_, *const [u8; STRING_LEN]>(STRING_BYTES.as_ptr()) },
87 null_terminator: 0u8,
88 };
89
90 HASH
91 }
92 }
93 }
94
95 // Documented in `pw_tokenizer::token`.
96 #[proc_macro]
_token(tokens: TokenStream) -> TokenStream97 pub fn _token(tokens: TokenStream) -> TokenStream {
98 let input = parse_macro_input!(tokens as LitStr);
99 token_backend("", &[input.into_token_stream()]).into()
100 }
101
102 // Args to tokenize to buffer that are parsed according to the pattern:
103 // ($buffer:expr, $format_string:literal, $($args:expr),*)
104 #[derive(Debug)]
105 struct TokenizeToBufferArgs<T: FormatStringParser + core::fmt::Debug> {
106 buffer: Expr,
107 format_and_args: FormatAndArgsFlavor<T>,
108 }
109
110 impl<T: FormatStringParser + core::fmt::Debug> Parse for TokenizeToBufferArgs<T> {
parse(input: ParseStream) -> syn::parse::Result<Self>111 fn parse(input: ParseStream) -> syn::parse::Result<Self> {
112 let buffer: Expr = input.parse()?;
113 input.parse::<Token![,]>()?;
114 let format_and_args: FormatAndArgsFlavor<_> = input.parse()?;
115
116 Ok(TokenizeToBufferArgs {
117 buffer,
118 format_and_args,
119 })
120 }
121 }
122
123 // A PrintfFormatMacroGenerator that provides the code generation backend for
124 // the `tokenize_to_buffer!` macro.
125 struct TokenizeToBufferGenerator<'a> {
126 domain: &'a str,
127 buffer: &'a Expr,
128 encoding_fragments: Vec<TokenStream2>,
129 }
130
131 impl<'a> TokenizeToBufferGenerator<'a> {
new(domain: &'a str, buffer: &'a Expr) -> Self132 fn new(domain: &'a str, buffer: &'a Expr) -> Self {
133 Self {
134 domain,
135 buffer,
136 encoding_fragments: Vec::new(),
137 }
138 }
139 }
140
141 impl<'a> PrintfFormatMacroGenerator for TokenizeToBufferGenerator<'a> {
finalize( self, format_string_fragments: &[PrintfFormatStringFragment], ) -> Result<TokenStream2>142 fn finalize(
143 self,
144 format_string_fragments: &[PrintfFormatStringFragment],
145 ) -> Result<TokenStream2> {
146 // Locally scoped aliases so we can refer to them in `quote!()`
147 let buffer = self.buffer;
148 let encoding_fragments = self.encoding_fragments;
149
150 let format_string_pieces: Vec<_> = format_string_fragments
151 .iter()
152 .map(|fragment| fragment.as_token_stream("__pw_tokenizer_crate"))
153 .collect::<Result<Vec<_>>>()?;
154
155 // `token_backend` returns a `TokenStream2` which both inserts the
156 // string into the token database and returns the hash value.
157 let token = token_backend(self.domain, &format_string_pieces);
158
159 if encoding_fragments.is_empty() {
160 Ok(quote! {
161 {
162 __pw_tokenizer_crate::internal::tokenize_to_buffer_no_args(#buffer, #token)
163 }
164 })
165 } else {
166 Ok(quote! {
167 {
168 use __pw_tokenizer_crate::internal::Argument;
169 __pw_tokenizer_crate::internal::tokenize_to_buffer(
170 #buffer,
171 #token,
172 &[#(#encoding_fragments),*]
173 )
174 }
175 })
176 }
177 }
178
string_fragment(&mut self, _string: &str) -> Result<()>179 fn string_fragment(&mut self, _string: &str) -> Result<()> {
180 // String fragments are encoded directly into the format string.
181 Ok(())
182 }
183
integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>>184 fn integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>> {
185 self.encoding_fragments.push(quote! {
186 Argument::Varint(#ty::from(#expression) as i32)
187 });
188
189 Ok(None)
190 }
191
string_conversion(&mut self, expression: Arg) -> Result<Option<String>>192 fn string_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
193 self.encoding_fragments.push(quote! {
194 Argument::String(#expression)
195 });
196 Ok(None)
197 }
198
char_conversion(&mut self, expression: Arg) -> Result<Option<String>>199 fn char_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
200 self.encoding_fragments.push(quote! {
201 Argument::Char(u8::from(#expression))
202 });
203 Ok(None)
204 }
205
untyped_conversion(&mut self, expression: Arg) -> Result<()>206 fn untyped_conversion(&mut self, expression: Arg) -> Result<()> {
207 self.encoding_fragments.push(quote! {
208 Argument::from(#expression)
209 });
210 Ok(())
211 }
212 }
213
214 /// Generates code to marshal a tokenized core::fmt format string and arguments
215 /// into a buffer. See [`pw_tokenizer::tokenize_core_fmt_to_buffer`] for details
216 /// on behavior.
217 ///
218 /// Internally the [`AsMut<u8>`] is wrapped in a [`pw_stream::Cursor`] to
219 /// fill the buffer incrementally.
220 #[proc_macro]
_tokenize_core_fmt_to_buffer(tokens: TokenStream) -> TokenStream221 pub fn _tokenize_core_fmt_to_buffer(tokens: TokenStream) -> TokenStream {
222 let input = parse_macro_input!(tokens as TokenizeToBufferArgs<CoreFmtFormatStringParser>);
223
224 // Hard codes domain to "".
225 let generator = TokenizeToBufferGenerator::new("", &input.buffer);
226
227 match generate_printf(generator, input.format_and_args.into()) {
228 Ok(token_stream) => token_stream.into(),
229 Err(e) => e.to_compile_error().into(),
230 }
231 }
232
233 /// Generates code to marshal a tokenized printf format string and arguments
234 /// into a buffer. See [`pw_tokenizer::tokenize_printf_to_buffer`] for details
235 /// on behavior.
236 ///
237 /// Internally the [`AsMut<u8>`] is wrapped in a [`pw_stream::Cursor`] to
238 /// fill the buffer incrementally.
239 #[proc_macro]
_tokenize_printf_to_buffer(tokens: TokenStream) -> TokenStream240 pub fn _tokenize_printf_to_buffer(tokens: TokenStream) -> TokenStream {
241 let input = parse_macro_input!(tokens as TokenizeToBufferArgs<PrintfFormatStringParser>);
242
243 // Hard codes domain to "".
244 let generator = TokenizeToBufferGenerator::new("", &input.buffer);
245
246 match generate_printf(generator, input.format_and_args.into()) {
247 Ok(token_stream) => token_stream.into(),
248 Err(e) => e.to_compile_error().into(),
249 }
250 }
251
252 // Args to tokenize to buffer that are parsed according to the pattern:
253 // ($ty:ty, $format_string:literal, $($args:expr),*)
254 #[derive(Debug)]
255 struct TokenizeToWriterArgs<T: FormatStringParser> {
256 ty: Type,
257 format_and_args: FormatAndArgsFlavor<T>,
258 }
259
260 impl<T: FormatStringParser> Parse for TokenizeToWriterArgs<T> {
parse(input: ParseStream) -> syn::parse::Result<Self>261 fn parse(input: ParseStream) -> syn::parse::Result<Self> {
262 let ty: Type = input.parse()?;
263 input.parse::<Token![,]>()?;
264 let format_and_args: FormatAndArgsFlavor<_> = input.parse()?;
265
266 Ok(Self {
267 ty,
268 format_and_args,
269 })
270 }
271 }
272
273 // A PrintfFormatMacroGenerator that provides the code generation backend for
274 // the `tokenize_to_writer!` macro.
275 struct TokenizeToWriterGenerator<'a> {
276 domain: &'a str,
277 ty: &'a Type,
278 encoding_fragments: Vec<TokenStream2>,
279 }
280
281 impl<'a> TokenizeToWriterGenerator<'a> {
new(domain: &'a str, ty: &'a Type) -> Self282 fn new(domain: &'a str, ty: &'a Type) -> Self {
283 Self {
284 domain,
285 ty,
286 encoding_fragments: Vec::new(),
287 }
288 }
289 }
290
291 impl<'a> PrintfFormatMacroGenerator for TokenizeToWriterGenerator<'a> {
finalize( self, format_string_fragments: &[PrintfFormatStringFragment], ) -> Result<TokenStream2>292 fn finalize(
293 self,
294 format_string_fragments: &[PrintfFormatStringFragment],
295 ) -> Result<TokenStream2> {
296 // Locally scoped aliases so we can refer to them in `quote!()`
297 let ty = self.ty;
298 let encoding_fragments = self.encoding_fragments;
299
300 let format_string_pieces: Vec<_> = format_string_fragments
301 .iter()
302 .map(|fragment| fragment.as_token_stream("__pw_tokenizer_crate"))
303 .collect::<Result<Vec<_>>>()?;
304
305 // `token_backend` returns a `TokenStream2` which both inserts the
306 // string into the token database and returns the hash value.
307 let token = token_backend(self.domain, &format_string_pieces);
308
309 if encoding_fragments.is_empty() {
310 Ok(quote! {
311 {
312 __pw_tokenizer_crate::internal::tokenize_to_writer_no_args::<#ty>(#token)
313 }
314 })
315 } else {
316 Ok(quote! {
317 {
318 use __pw_tokenizer_crate::internal::Argument;
319 __pw_tokenizer_crate::internal::tokenize_to_writer::<#ty>(
320 #token,
321 &[#(#encoding_fragments),*]
322 )
323 }
324 })
325 }
326 }
327
string_fragment(&mut self, _string: &str) -> Result<()>328 fn string_fragment(&mut self, _string: &str) -> Result<()> {
329 // String fragments are encoded directly into the format string.
330 Ok(())
331 }
332
integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>>333 fn integer_conversion(&mut self, ty: Ident, expression: Arg) -> Result<Option<String>> {
334 self.encoding_fragments.push(quote! {
335 Argument::Varint(#ty::from(#expression) as i32)
336 });
337
338 Ok(None)
339 }
340
string_conversion(&mut self, expression: Arg) -> Result<Option<String>>341 fn string_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
342 self.encoding_fragments.push(quote! {
343 Argument::String(#expression)
344 });
345 Ok(None)
346 }
347
char_conversion(&mut self, expression: Arg) -> Result<Option<String>>348 fn char_conversion(&mut self, expression: Arg) -> Result<Option<String>> {
349 self.encoding_fragments.push(quote! {
350 Argument::Char(u8::from(#expression))
351 });
352 Ok(None)
353 }
354
untyped_conversion(&mut self, expression: Arg) -> Result<()>355 fn untyped_conversion(&mut self, expression: Arg) -> Result<()> {
356 self.encoding_fragments.push(quote! {
357 Argument::from(#expression)
358 });
359 Ok(())
360 }
361 }
362
363 /// Generates code to marshal a tokenized core::fmt format string and arguments
364 /// into a [`pw_stream::Write`]. See [`pw_tokenizer::tokenize_core_fmt_to_writer`]
365 /// for details on behavior.
366 #[proc_macro]
_tokenize_core_fmt_to_writer(tokens: TokenStream) -> TokenStream367 pub fn _tokenize_core_fmt_to_writer(tokens: TokenStream) -> TokenStream {
368 let input = parse_macro_input!(tokens as TokenizeToWriterArgs<CoreFmtFormatStringParser>);
369
370 // Hard codes domain to "".
371 let generator = TokenizeToWriterGenerator::new("", &input.ty);
372
373 match generate_printf(generator, input.format_and_args.into()) {
374 Ok(token_stream) => token_stream.into(),
375 Err(e) => e.to_compile_error().into(),
376 }
377 }
378
379 /// Generates code to marshal a tokenized printf format string and arguments
380 /// into a [`pw_stream::Write`]. See [`pw_tokenizer::tokenize_printf_to_writer`]
381 /// for details on behavior.
382 #[proc_macro]
_tokenize_printf_to_writer(tokens: TokenStream) -> TokenStream383 pub fn _tokenize_printf_to_writer(tokens: TokenStream) -> TokenStream {
384 let input = parse_macro_input!(tokens as TokenizeToWriterArgs<PrintfFormatStringParser>);
385
386 // Hard codes domain to "".
387 let generator = TokenizeToWriterGenerator::new("", &input.ty);
388
389 match generate_printf(generator, input.format_and_args.into()) {
390 Ok(token_stream) => token_stream.into(),
391 Err(e) => e.to_compile_error().into(),
392 }
393 }
394
395 // Macros tested in `pw_tokenizer` crate.
396 #[cfg(test)]
397 mod tests {}
398