1 // Copyright 2023 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 //! The `pw_format` crate is a parser used to implement proc macros that:
16 //! * Understand format string argument types at compile time.
17 //! * Syntax check format strings.
18 //!
19 //! `pw_format` is written against `std` and is not intended to be
20 //! used in an embedded context. Some efficiency and memory is traded for a
21 //! more expressive interface that exposes the format string's "syntax tree"
22 //! to the API client.
23 //!
24 //! # Proc Macros
25 //!
26 //! The [`macros`] module provides infrastructure for implementing proc macros
27 //! that take format strings as arguments.
28 //!
29 //! # Example
30 //!
31 //! ```
32 //! use pw_format::{
33 //! Alignment, Argument, ConversionSpec, Flag, FormatFragment, FormatString,
34 //! Length, MinFieldWidth, Precision, Primitive, Style,
35 //! };
36 //!
37 //! let format_string =
38 //! FormatString::parse_printf("long double %+ 4.2Lf is %-03hd%%.").unwrap();
39 //!
40 //! assert_eq!(format_string, FormatString {
41 //! fragments: vec![
42 //! FormatFragment::Literal("long double ".to_string()),
43 //! FormatFragment::Conversion(ConversionSpec {
44 //! argument: Argument::None,
45 //! fill: ' ',
46 //! alignment: Alignment::None,
47 //! flags: [Flag::ForceSign, Flag::SpaceSign].into_iter().collect(),
48 //! min_field_width: MinFieldWidth::Fixed(4),
49 //! precision: Precision::Fixed(2),
50 //! length: Some(Length::LongDouble),
51 //! primitive: Primitive::Float,
52 //! style: Style::None,
53 //! }),
54 //! FormatFragment::Literal(" is ".to_string()),
55 //! FormatFragment::Conversion(ConversionSpec {
56 //! argument: Argument::None,
57 //! fill: ' ',
58 //! alignment: Alignment::Left,
59 //! flags: [Flag::LeftJustify, Flag::LeadingZeros]
60 //! .into_iter()
61 //! .collect(),
62 //! min_field_width: MinFieldWidth::Fixed(3),
63 //! precision: Precision::None,
64 //! length: Some(Length::Short),
65 //! primitive: Primitive::Integer,
66 //! style: Style::None,
67 //! }),
68 //! FormatFragment::Literal("%.".to_string()),
69 //! ]
70 //! });
71 //! ```
72 #![deny(missing_docs)]
73 //#![feature(type_alias_impl_trait)]
74
75 use std::collections::HashSet;
76
77 use nom::{
78 branch::alt,
79 bytes::complete::tag,
80 character::complete::digit1,
81 combinator::{map, map_res},
82 IResult,
83 };
84 use quote::{quote, ToTokens};
85
86 pub mod macros;
87
88 mod core_fmt;
89 mod printf;
90
91 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
92 /// Primitive type of a conversion (integer, float, string, etc.)
93 pub enum Primitive {
94 /// Signed integer primitive.
95 Integer,
96
97 /// Unsigned integer primitive.
98 Unsigned,
99
100 /// Floating point primitive.
101 Float,
102
103 /// String primitive.
104 String,
105
106 /// Character primitive.
107 Character,
108
109 /// Pointer primitive.
110 Pointer,
111
112 /// Untyped primitive.
113 Untyped,
114 }
115
116 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
117 /// The abstract formatting style for a conversion.
118 pub enum Style {
119 /// No style specified, use defaults.
120 None,
121
122 /// Octal rendering (i.e. "%o" or "{:o}").
123 Octal,
124
125 /// Hexadecimal rendering (i.e. "%x" or "{:x}").
126 Hex,
127
128 /// Upper case hexadecimal rendering (i.e. "%X" or "{:X}").
129 UpperHex,
130
131 /// Exponential rendering (i.e. "%e" or "{:e}".
132 Exponential,
133
134 /// Upper case exponential rendering (i.e. "%E" or "{:E}".
135 UpperExponential,
136
137 /// Pointer type rendering (i.e. "%p" or "{:p}").
138 Pointer,
139
140 /// `core::fmt`'s `{:?}`
141 Debug,
142
143 /// `core::fmt`'s `{:x?}`
144 HexDebug,
145
146 /// `core::fmt`'s `{:X?}`
147 UpperHexDebug,
148
149 /// Unsupported binary rendering
150 ///
151 /// This variant exists so that the proc macros can give useful error
152 /// messages.
153 Binary,
154 }
155
156 /// Implemented for testing through the pw_format_test_macros crate.
157 impl ToTokens for Style {
to_tokens(&self, tokens: &mut proc_macro2::TokenStream)158 fn to_tokens(&self, tokens: &mut proc_macro2::TokenStream) {
159 let new_tokens = match self {
160 Style::None => quote!(pw_format::Style::None),
161 Style::Octal => quote!(pw_format::Style::Octal),
162 Style::Hex => quote!(pw_format::Style::Hex),
163 Style::UpperHex => quote!(pw_format::Style::UpperHex),
164 Style::Exponential => quote!(pw_format::Style::Exponential),
165 Style::UpperExponential => quote!(pw_format::Style::UpperExponential),
166 Style::Debug => quote!(pw_format::Style::Debug),
167 Style::HexDebug => quote!(pw_format::Style::HexDebug),
168 Style::UpperHexDebug => quote!(pw_format::Style::UpperHexDebug),
169 Style::Pointer => quote!(pw_format::Style::Pointer),
170 Style::Binary => quote!(pw_format::Style::Binary),
171 };
172 new_tokens.to_tokens(tokens);
173 }
174 }
175
176 #[derive(Clone, Debug, Hash, PartialEq, Eq)]
177 /// A printf flag (the '+' in %+d).
178 pub enum Flag {
179 /// `-`
180 LeftJustify,
181
182 /// `+`
183 ForceSign,
184
185 /// ` `
186 SpaceSign,
187
188 /// `#`
189 AlternateSyntax,
190
191 /// `0`
192 LeadingZeros,
193 }
194
195 #[derive(Clone, Debug, PartialEq, Eq)]
196 /// A printf minimum field width (the 5 in %5d).
197 pub enum MinFieldWidth {
198 /// No field width specified.
199 None,
200
201 /// Fixed field with.
202 Fixed(u32),
203
204 /// Variable field width passed as an argument (i.e. %*d).
205 Variable,
206 }
207
208 #[derive(Clone, Debug, PartialEq, Eq)]
209 /// A printf precision (the .5 in %.5d).
210 ///
211 /// For string conversions (%s) this is treated as the maximum number of
212 /// bytes of the string to output.
213 pub enum Precision {
214 /// No precision specified.
215 None,
216
217 /// Fixed precision.
218 Fixed(u32),
219
220 /// Variable precision passed as an argument (i.e. %.*f).
221 Variable,
222 }
223
224 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
225 /// A printf length (the l in %ld).
226 pub enum Length {
227 /// `hh`
228 Char,
229
230 /// `h`
231 Short,
232
233 /// `l`
234 Long,
235
236 /// `ll`
237 LongLong,
238
239 /// `L`
240 LongDouble,
241
242 /// `j`
243 IntMax,
244
245 /// `z`
246 Size,
247
248 /// `t`
249 PointerDiff,
250 }
251
252 #[derive(Clone, Copy, Debug, PartialEq, Eq)]
253 /// A core::fmt alignment spec.
254 pub enum Alignment {
255 /// No alignment
256 None,
257
258 /// Left alignment (`<`)
259 Left,
260
261 /// Center alignment (`^`)
262 Center,
263
264 /// Right alignment (`>`)
265 Right,
266 }
267
268 #[derive(Clone, Debug, PartialEq, Eq)]
269 /// An argument in a core::fmt style alignment spec.
270 ///
271 /// i.e. the var_name in `{var_name:#0x}`
272 pub enum Argument {
273 /// No argument
274 None,
275
276 /// A positional argument (i.e. `{0}`).
277 Positional(usize),
278
279 /// A named argument (i.e. `{var_name}`).
280 Named(String),
281 }
282
283 #[derive(Clone, Debug, PartialEq, Eq)]
284 /// A printf conversion specification aka a % clause.
285 pub struct ConversionSpec {
286 /// ConversionSpec's argument.
287 pub argument: Argument,
288 /// ConversionSpec's fill character.
289 pub fill: char,
290 /// ConversionSpec's field alignment.
291 pub alignment: Alignment,
292 /// ConversionSpec's set of [Flag]s.
293 pub flags: HashSet<Flag>,
294 /// ConversionSpec's minimum field width argument.
295 pub min_field_width: MinFieldWidth,
296 /// ConversionSpec's [Precision] argument.
297 pub precision: Precision,
298 /// ConversionSpec's [Length] argument.
299 pub length: Option<Length>,
300 /// ConversionSpec's [Primitive].
301 pub primitive: Primitive,
302 /// ConversionSpec's [Style].
303 pub style: Style,
304 }
305
306 #[derive(Clone, Debug, PartialEq, Eq)]
307 /// A fragment of a printf format string.
308 pub enum FormatFragment {
309 /// A literal string value.
310 Literal(String),
311
312 /// A conversion specification (i.e. %d).
313 Conversion(ConversionSpec),
314 }
315
316 impl FormatFragment {
317 /// Try to append `fragment` to `self`.
318 ///
319 /// Returns `None` if the appending succeeds and `Some<fragment>` if it fails.
try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment>320 fn try_append<'a>(&mut self, fragment: &'a FormatFragment) -> Option<&'a FormatFragment> {
321 let Self::Literal(literal_fragment) = &fragment else {
322 return Some(fragment);
323 };
324
325 let Self::Literal(ref mut literal_self) = self else {
326 return Some(fragment);
327 };
328
329 literal_self.push_str(literal_fragment);
330
331 None
332 }
333 }
334
335 #[derive(Debug, PartialEq, Eq)]
336 /// A parsed printf format string.
337 pub struct FormatString {
338 /// The [FormatFragment]s that comprise the [FormatString].
339 pub fragments: Vec<FormatFragment>,
340 }
341
342 impl FormatString {
343 /// Parses a printf style format string.
parse_printf(s: &str) -> Result<Self, String>344 pub fn parse_printf(s: &str) -> Result<Self, String> {
345 // TODO: b/281858500 - Add better errors to failed parses.
346 let (rest, result) = printf::format_string(s)
347 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
348
349 // If the parser did not consume all the input, return an error.
350 if !rest.is_empty() {
351 return Err(format!(
352 "Failed to parse format string fragment: \"{rest}\""
353 ));
354 }
355
356 Ok(result)
357 }
358
359 /// Parses a core::fmt style format string.
parse_core_fmt(s: &str) -> Result<Self, String>360 pub fn parse_core_fmt(s: &str) -> Result<Self, String> {
361 // TODO: b/281858500 - Add better errors to failed parses.
362 let (rest, result) = core_fmt::format_string(s)
363 .map_err(|e| format!("Failed to parse format string \"{s}\": {e}"))?;
364
365 // If the parser did not consume all the input, return an error.
366 if !rest.is_empty() {
367 return Err(format!("Failed to parse format string: \"{rest}\""));
368 }
369
370 Ok(result)
371 }
372
373 /// Creates a `FormatString` from a slice of fragments.
374 ///
375 /// This primary responsibility of this function is to merge literal
376 /// fragments. Adjacent literal fragments occur when a parser parses
377 /// escape sequences. Merging them here allows a
378 /// [`macros::FormatMacroGenerator`] to not worry about the escape codes.
from_fragments(fragments: &[FormatFragment]) -> Self379 pub(crate) fn from_fragments(fragments: &[FormatFragment]) -> Self {
380 Self {
381 fragments: fragments
382 .iter()
383 .fold(Vec::<_>::new(), |mut fragments, fragment| {
384 // Collapse adjacent literal fragments.
385 let Some(last) = fragments.last_mut() else {
386 // If there are no accumulated fragments, add this one and return.
387 fragments.push((*fragment).clone());
388 return fragments;
389 };
390 if let Some(fragment) = last.try_append(fragment) {
391 // If the fragments were able to append, no more work to do
392 fragments.push((*fragment).clone());
393 };
394 fragments
395 }),
396 }
397 }
398 }
399
variable_width(input: &str) -> IResult<&str, MinFieldWidth>400 fn variable_width(input: &str) -> IResult<&str, MinFieldWidth> {
401 map(tag("*"), |_| MinFieldWidth::Variable)(input)
402 }
403
fixed_width(input: &str) -> IResult<&str, MinFieldWidth>404 fn fixed_width(input: &str) -> IResult<&str, MinFieldWidth> {
405 map_res(
406 digit1,
407 |value: &str| -> Result<MinFieldWidth, std::num::ParseIntError> {
408 Ok(MinFieldWidth::Fixed(value.parse()?))
409 },
410 )(input)
411 }
412
no_width(input: &str) -> IResult<&str, MinFieldWidth>413 fn no_width(input: &str) -> IResult<&str, MinFieldWidth> {
414 Ok((input, MinFieldWidth::None))
415 }
416
width(input: &str) -> IResult<&str, MinFieldWidth>417 fn width(input: &str) -> IResult<&str, MinFieldWidth> {
418 alt((variable_width, fixed_width, no_width))(input)
419 }
420
variable_precision(input: &str) -> IResult<&str, Precision>421 fn variable_precision(input: &str) -> IResult<&str, Precision> {
422 let (input, _) = tag(".")(input)?;
423 map(tag("*"), |_| Precision::Variable)(input)
424 }
425
fixed_precision(input: &str) -> IResult<&str, Precision>426 fn fixed_precision(input: &str) -> IResult<&str, Precision> {
427 let (input, _) = tag(".")(input)?;
428 map_res(
429 digit1,
430 |value: &str| -> Result<Precision, std::num::ParseIntError> {
431 Ok(Precision::Fixed(value.parse()?))
432 },
433 )(input)
434 }
435
no_precision(input: &str) -> IResult<&str, Precision>436 fn no_precision(input: &str) -> IResult<&str, Precision> {
437 Ok((input, Precision::None))
438 }
439
precision(input: &str) -> IResult<&str, Precision>440 fn precision(input: &str) -> IResult<&str, Precision> {
441 alt((variable_precision, fixed_precision, no_precision))(input)
442 }
443
444 #[cfg(test)]
445 mod tests;
446