xref: /aosp_15_r20/external/pigweed/pw_format/rust/pw_format/lib.rs (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 //! The `pw_format` crate is a parser used to implement proc macros that:
16 //! * Understand format string argument types at compile time.
17 //! * Syntax check format strings.
18 //!
19 //! `pw_format` is written against `std` and is not intended to be
20 //! used in an embedded context.  Some efficiency and memory is traded for a
21 //! more expressive interface that exposes the format string's "syntax tree"
22 //! to the API client.
23 //!
24 //! # Proc Macros
25 //!
26 //! The [`macros`] module provides infrastructure for implementing proc macros
27 //! that take format strings as arguments.
28 //!
29 //! # Example
30 //!
31 //! ```
32 //! use pw_format::{
33 //!     Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34 //!     Length, MinFieldWidth, Precision, Primitive, Style,
35 //! };
36 //!
37 //! let format_string =
38 //!   FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39 //!
40 //! assert_eq!(format_string, FormatString {
41 //!   fragments: vec![
42 //!       FormatFragment::Literal("long double ".to_string()),
43 //!       FormatFragment::Conversion(ConversionSpec {
44 //!           argument: Argument::None,
45 //!           fill: ' ',
46 //!           alignment: Alignment::None,
47 //!           flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48 //!           min_field_width: MinFieldWidth::Fixed(4),
49 //!           precision: Precision::Fixed(2),
50 //!           length: Some(Length::LongDouble),
51 //!           primitive: Primitive::Float,
52 //!           style: Style::None,
53 //!       }),
54 //!       FormatFragment::Literal(" is ".to_string()),
55 //!       FormatFragment::Conversion(ConversionSpec {
56 //!           argument: Argument::None,
57 //!           fill: ' ',
58 //!           alignment: Alignment::Left,
59 //!           flags: [Flag::LeftJustify, Flag::LeadingZeros]
60 //!               .into_iter()
61 //!               .collect(),
62 //!           min_field_width: MinFieldWidth::Fixed(3),
63 //!           precision: Precision::None,
64 //!           length: Some(Length::Short),
65 //!           primitive: Primitive::Integer,
66 //!           style: Style::None,
67 //!       }),
68 //!       FormatFragment::Literal("%.".to_string()),
69 //!   ]
70 //! });
71 //! ```
72 #![deny(missing_docs)]
73 //#![feature(type_alias_impl_trait)]
74 
75 use std::collections::HashSet;
76 
77 use nom::{
78     branch::alt,
79     bytes::complete::tag,
80     character::complete::digit1,
81     combinator::{map, map_res},
82     IResult,
83 };
84 use quote::{quote, ToTokens};
85 
86 pub mod macros;
87 
88 mod core_fmt;
89 mod printf;
90 
91 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
92 /// Primitive type of a conversion (integer, float, string, etc.)
93 pub enum Primitive {
94     /// Signed integer primitive.
95     Integer,
96 
97     /// Unsigned integer primitive.
98     Unsigned,
99 
100     /// Floating point primitive.
101     Float,
102 
103     /// String primitive.
104     String,
105 
106     /// Character primitive.
107     Character,
108 
109     /// Pointer primitive.
110     Pointer,
111 
112     /// Untyped primitive.
113     Untyped,
114 }
115 
116 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
117 /// The abstract formatting style for a conversion.
118 pub enum Style {
119     /// No style specified, use defaults.
120     None,
121 
122     /// Octal rendering (i.e. "%o" or "{:o}").
123     Octal,
124 
125     /// Hexadecimal rendering (i.e. "%x" or "{:x}").
126     Hex,
127 
128     /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
129     UpperHex,
130 
131     /// Exponential rendering (i.e. "%e" or "{:e}".
132     Exponential,
133 
134     /// Upper case exponential rendering (i.e. "%E" or "{:E}".
135     UpperExponential,
136 
137     /// Pointer type rendering (i.e. "%p" or "{:p}").
138     Pointer,
139 
140     /// `core::fmt`'s `{:?}`
141     Debug,
142 
143     /// `core::fmt`'s `{:x?}`
144     HexDebug,
145 
146     /// `core::fmt`'s `{:X?}`
147     UpperHexDebug,
148 
149     /// Unsupported binary rendering
150     ///
151     /// This variant exists so that the proc macros can give useful error
152     /// messages.
153     Binary,
154 }
155 
156 /// Implemented for testing through the pw_format_test_macros crate.
157 impl ToTokens for Style {
to_tokens(&self, tokens: &mut proc_macro2::TokenStream)158     fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
159         let new_tokens = match self {
160             Style::None => quote!(pw_format::Style::None),
161             Style::Octal => quote!(pw_format::Style::Octal),
162             Style::Hex => quote!(pw_format::Style::Hex),
163             Style::UpperHex => quote!(pw_format::Style::UpperHex),
164             Style::Exponential => quote!(pw_format::Style::Exponential),
165             Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
166             Style::Debug => quote!(pw_format::Style::Debug),
167             Style::HexDebug => quote!(pw_format::Style::HexDebug),
168             Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
169             Style::Pointer => quote!(pw_format::Style::Pointer),
170             Style::Binary => quote!(pw_format::Style::Binary),
171         };
172         new_tokens.to_tokens(tokens);
173     }
174 }
175 
176 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
177 /// A printf flag (the '+' in %+d).
178 pub enum Flag {
179     /// `-`
180     LeftJustify,
181 
182     /// `+`
183     ForceSign,
184 
185     /// ` `
186     SpaceSign,
187 
188     /// `#`
189     AlternateSyntax,
190 
191     /// `0`
192     LeadingZeros,
193 }
194 
195 #[derive(Clone, Debug, PartialEq, Eq)]
196 /// A printf minimum field width (the 5 in %5d).
197 pub enum MinFieldWidth {
198     /// No field width specified.
199     None,
200 
201     /// Fixed field with.
202     Fixed(u32),
203 
204     /// Variable field width passed as an argument (i.e. %*d).
205     Variable,
206 }
207 
208 #[derive(Clone, Debug, PartialEq, Eq)]
209 /// A printf precision (the .5 in %.5d).
210 ///
211 /// For string conversions (%s) this is treated as the maximum number of
212 /// bytes of the string to output.
213 pub enum Precision {
214     /// No precision specified.
215     None,
216 
217     /// Fixed precision.
218     Fixed(u32),
219 
220     /// Variable precision passed as an argument (i.e. %.*f).
221     Variable,
222 }
223 
224 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
225 /// A printf length (the l in %ld).
226 pub enum Length {
227     /// `hh`
228     Char,
229 
230     /// `h`
231     Short,
232 
233     /// `l`
234     Long,
235 
236     /// `ll`
237     LongLong,
238 
239     /// `L`
240     LongDouble,
241 
242     /// `j`
243     IntMax,
244 
245     /// `z`
246     Size,
247 
248     /// `t`
249     PointerDiff,
250 }
251 
252 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
253 /// A core::fmt alignment spec.
254 pub enum Alignment {
255     /// No alignment
256     None,
257 
258     /// Left alignment (`<`)
259     Left,
260 
261     /// Center alignment (`^`)
262     Center,
263 
264     /// Right alignment (`>`)
265     Right,
266 }
267 
268 #[derive(Clone, Debug, PartialEq, Eq)]
269 /// An argument in a core::fmt style alignment spec.
270 ///
271 /// i.e. the var_name in `{var_name:#0x}`
272 pub enum Argument {
273     /// No argument
274     None,
275 
276     /// A positional argument (i.e. `{0}`).
277     Positional(usize),
278 
279     /// A named argument (i.e. `{var_name}`).
280     Named(String),
281 }
282 
283 #[derive(Clone, Debug, PartialEq, Eq)]
284 /// A printf conversion specification aka a % clause.
285 pub struct ConversionSpec {
286     /// ConversionSpec's argument.
287     pub argument: Argument,
288     /// ConversionSpec's fill character.
289     pub fill: char,
290     /// ConversionSpec's field alignment.
291     pub alignment: Alignment,
292     /// ConversionSpec's set of [Flag]s.
293     pub flags: HashSet<Flag>,
294     /// ConversionSpec's minimum field width argument.
295     pub min_field_width: MinFieldWidth,
296     /// ConversionSpec's [Precision] argument.
297     pub precision: Precision,
298     /// ConversionSpec's [Length] argument.
299     pub length: Option<Length>,
300     /// ConversionSpec's [Primitive].
301     pub primitive: Primitive,
302     /// ConversionSpec's [Style].
303     pub style: Style,
304 }
305 
306 #[derive(Clone, Debug, PartialEq, Eq)]
307 /// A fragment of a printf format string.
308 pub enum FormatFragment {
309     /// A literal string value.
310     Literal(String),
311 
312     /// A conversion specification (i.e. %d).
313     Conversion(ConversionSpec),
314 }
315 
316 impl FormatFragment {
317     /// Try to append `fragment` to `self`.
318     ///
319     /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment>320     fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
321         let Self::Literal(literal_fragment) = &fragment else {
322             return Some(fragment);
323         };
324 
325         let Self::Literal(ref mut literal_self) = self else {
326             return Some(fragment);
327         };
328 
329         literal_self.push_str(literal_fragment);
330 
331         None
332     }
333 }
334 
335 #[derive(Debug, PartialEq, Eq)]
336 /// A parsed printf format string.
337 pub struct FormatString {
338     /// The [FormatFragment]s that comprise the [FormatString].
339     pub fragments: Vec<FormatFragment>,
340 }
341 
342 impl FormatString {
343     /// Parses a printf style format string.
parse_printf(s: &str) -> Result<Self, String>344     pub fn parse_printf(s: &str) -> Result<Self, String> {
345         // TODO: b/281858500 - Add better errors to failed parses.
346         let (rest, result) = printf::format_string(s)
347             .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
348 
349         // If the parser did not consume all the input, return an error.
350         if !rest.is_empty() {
351             return Err(format!(
352                 "Failed to parse format string fragment: \"{rest}\""
353             ));
354         }
355 
356         Ok(result)
357     }
358 
359     /// Parses a core::fmt style format string.
parse_core_fmt(s: &str) -> Result<Self, String>360     pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
361         // TODO: b/281858500 - Add better errors to failed parses.
362         let (rest, result) = core_fmt::format_string(s)
363             .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
364 
365         // If the parser did not consume all the input, return an error.
366         if !rest.is_empty() {
367             return Err(format!("Failed to parse format string: \"{rest}\""));
368         }
369 
370         Ok(result)
371     }
372 
373     /// Creates a `FormatString` from a slice of fragments.
374     ///
375     /// This primary responsibility of this function is to merge literal
376     /// fragments.  Adjacent literal fragments occur when a parser parses
377     /// escape sequences.  Merging them here allows a
378     /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
from_fragments(fragments: &[FormatFragment]) -> Self379     pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
380         Self {
381             fragments: fragments
382                 .iter()
383                 .fold(Vec::<_>::new(), |mut fragments, fragment| {
384                     // Collapse adjacent literal fragments.
385                     let Some(last) = fragments.last_mut() else {
386                         // If there are no accumulated fragments, add this one and return.
387                         fragments.push((*fragment).clone());
388                         return fragments;
389                     };
390                     if let Some(fragment) = last.try_append(fragment) {
391                         // If the fragments were able to append, no more work to do
392                         fragments.push((*fragment).clone());
393                     };
394                     fragments
395                 }),
396         }
397     }
398 }
399 
variable_width(input: &str) -> IResult<&str, MinFieldWidth>400 fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
401     map(tag("*"), |_| MinFieldWidth::Variable)(input)
402 }
403 
fixed_width(input: &str) -> IResult<&str, MinFieldWidth>404 fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
405     map_res(
406         digit1,
407         |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
408             Ok(MinFieldWidth::Fixed(value.parse()?))
409         },
410     )(input)
411 }
412 
no_width(input: &str) -> IResult<&str, MinFieldWidth>413 fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
414     Ok((input, MinFieldWidth::None))
415 }
416 
width(input: &str) -> IResult<&str, MinFieldWidth>417 fn width(input: &str) -> IResult<&str, MinFieldWidth> {
418     alt((variable_width, fixed_width, no_width))(input)
419 }
420 
variable_precision(input: &str) -> IResult<&str, Precision>421 fn variable_precision(input: &str) -> IResult<&str, Precision> {
422     let (input, _) = tag(".")(input)?;
423     map(tag("*"), |_| Precision::Variable)(input)
424 }
425 
fixed_precision(input: &str) -> IResult<&str, Precision>426 fn fixed_precision(input: &str) -> IResult<&str, Precision> {
427     let (input, _) = tag(".")(input)?;
428     map_res(
429         digit1,
430         |value: &str| -> Result<Precision, std::num::ParseIntError> {
431             Ok(Precision::Fixed(value.parse()?))
432         },
433     )(input)
434 }
435 
no_precision(input: &str) -> IResult<&str, Precision>436 fn no_precision(input: &str) -> IResult<&str, Precision> {
437     Ok((input, Precision::None))
438 }
439 
precision(input: &str) -> IResult<&str, Precision>440 fn precision(input: &str) -> IResult<&str, Precision> {
441     alt((variable_precision, fixed_precision, no_precision))(input)
442 }
443 
444 #[cfg(test)]
445 mod tests;
446