xref: /aosp_15_r20/external/pigweed/pw_tokenizer/public/pw_tokenizer/internal/decode.h (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 // decode.h defines classes that implement tokenized string decoding. These
16 // classes should not be used directly; instead decode tokenized messages with
17 // the Detokenizer class, defined in pw_tokenizer/detokenize.h.
18 #pragma once
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdio>
23 #include <string>
24 #include <string_view>
25 #include <utility>
26 #include <vector>
27 
28 #include "pw_preprocessor/compiler.h"
29 #include "pw_span/span.h"
30 
31 // Decoding errors are marked with prefix and suffix so that they stand out from
32 // the rest of the decoded strings. These macros are used to build decoding
33 // error strings.
34 #define PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX "<["
35 #define PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX "]>"
36 #define PW_TOKENIZER_ARG_DECODING_ERROR(message) \
37   PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX message \
38       PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX
39 
40 namespace pw::tokenizer {
41 
42 // The status of an argument that was decoded from an encoded tokenized string.
43 // This enum should match the values in decode.py's DecodedArg class.
44 class ArgStatus {
45  public:
46   // The Code enum tracks issues arise when decoding a tokenized string
47   // argument. Each value is one bit, and an ArgStatus will have multiple bits
48   // set if multiple issues are encountered.
49   enum Code : unsigned {
50     kOk = 0,           // Decoding was successful.
51     kMissing = 1,      // The argument was not present in the data.
52     kTruncated = 2,    // The argument was truncated during encoding.
53     kDecodeError = 4,  // An error occurred while decoding the argument.
54     kSkipped = 8,      // Argument was skipped due to a previous error.
55   };
56 
status_(code)57   constexpr ArgStatus(Code code = kOk) : status_(code) {}
58 
59   // Sets additional status bits.
Update(ArgStatus status)60   constexpr void Update(ArgStatus status) { status_ |= status.status_; }
61 
62   // True if no decoding errors occurred. Truncated is considered OK, since
63   // encoding and decoding still occurs successfully when a string is truncated.
ok()64   constexpr bool ok() const { return status_ == kOk || status_ == kTruncated; }
65 
66   // Checks if an error flag is set in the status.
HasError(Code code)67   constexpr bool HasError(Code code) const { return (status_ & code) != 0u; }
68 
69  private:
70   // Since multiple Code bits may be set in an ArgStatus, the status is stored
71   // as an unsigned instead of a Code.
72   unsigned status_;
73 };
74 
75 // An argument decoded from an encoded tokenized message.
76 class DecodedArg {
77  public:
78   // Constructs a DecodedArg from a decoded value. The value is formatted into a
79   // string using the provided format string. The number of bytes that were
80   // decoded to get the value are provided in raw_size_bytes.
81   template <typename ArgumentType>
82   static DecodedArg FromValue(const char* format_string,
83                               ArgumentType value,
84                               size_t raw_size_bytes,
85                               ArgStatus arg_status = ArgStatus::kOk);
86 
87   // Constructs a DecodedArg that represents a string literal in the format
88   // string (plain text or % character).
DecodedArg(const std::string & literal)89   DecodedArg(const std::string& literal)
90       : value_(literal), raw_data_size_bytes_(0) {}
91 
92   // Constructs a DecodedArg that encountered an error during decoding.
93   DecodedArg(ArgStatus error,
94              std::string_view spec,
95              size_t raw_size_bytes = 0u,
96              std::string_view value = {});
97 
98   // This argument's value as a string. If an error occurred while decoding this
99   // argument, value() will be an error message.
value()100   const std::string& value() const { return value_; }
101 
102   // Returns the conversion specification for this argument (e.g. %02x). This is
103   // empty for literals or "%%".
spec()104   const std::string& spec() const { return spec_; }
105 
106   // True if this argument decoded successfully.
ok()107   bool ok() const { return status_.ok(); }
108 
109   // How many bytes this arg occupied in the encoded arguments.
raw_size_bytes()110   size_t raw_size_bytes() const { return raw_data_size_bytes_; }
111 
112  private:
DecodedArg(const char * format,size_t raw_size_bytes,ArgStatus status)113   DecodedArg(const char* format, size_t raw_size_bytes, ArgStatus status)
114       : spec_(format), raw_data_size_bytes_(raw_size_bytes), status_(status) {}
115 
116   std::string value_;
117   std::string spec_;
118   size_t raw_data_size_bytes_;
119   ArgStatus status_;
120 };
121 
122 // Represents a segment of a printf-style format string. Each StringSegment
123 // contains either literal text or a format specifier.
124 class StringSegment {
125  public:
126   // Parses a format specifier from the text and returns a StringSegment that
127   // represents it. Returns an empty StringSegment if no valid format specifier
128   // was found.
129   static StringSegment ParseFormatSpec(const char* format);
130 
131   // Creates a StringSegment that represents a piece of plain text.
StringSegment(std::string_view text)132   StringSegment(std::string_view text) : StringSegment(text, kLiteral) {}
133 
134   // Returns the DecodedArg with this StringSegment decoded according to the
135   // provided arguments.
136   DecodedArg Decode(const span<const uint8_t>& arguments) const;
137 
138   // Skips decoding this StringSegment. Literals and %% are expanded as normal.
139   DecodedArg Skip() const;
140 
empty()141   bool empty() const { return text_.empty(); }
142 
text()143   const std::string& text() const { return text_; }
144 
145  private:
146   enum Type {
147     kLiteral,
148     kPercent,  // %% format specifier
149     kString,
150     kSignedInt,
151     kUnsigned32,
152     kUnsigned64,
153     kFloatingPoint,
154   };
155 
156   // Varargs-promoted size of args on this machine; only needed for ints or %p.
157   enum ArgSize : bool { k32Bit, k64Bit };
158 
159   template <typename T>
VarargSize()160   static constexpr ArgSize VarargSize() {
161     return sizeof(T) == sizeof(int64_t) ? k64Bit : k32Bit;
162   }
163 
164   static ArgSize VarargSize(std::array<char, 2> length, char spec);
165 
StringSegment()166   StringSegment() : type_(kLiteral) {}
167 
StringSegment(std::string_view text,Type type)168   StringSegment(std::string_view text, Type type)
169       : StringSegment(text, type, VarargSize<void*>()) {}
170 
StringSegment(std::string_view text,Type type,ArgSize local_size)171   StringSegment(std::string_view text, Type type, ArgSize local_size)
172       : text_(text), type_(type), local_size_(local_size) {}
173 
174   DecodedArg DecodeString(const span<const uint8_t>& arguments) const;
175 
176   DecodedArg DecodeInteger(const span<const uint8_t>& arguments) const;
177 
178   DecodedArg DecodeFloatingPoint(const span<const uint8_t>& arguments) const;
179 
180   std::string text_;
181   Type type_;
182   ArgSize local_size_;  // Arg size to use for snprintf on this machine.
183 };
184 
185 // The result of decoding a tokenized message with a FormatString. Stores
186 // decoded arguments and whether there was any undecoded data. This is returned
187 // from a FormatString::Format call.
188 class DecodedFormatString {
189  public:
DecodedFormatString(std::vector<DecodedArg> && segments,size_t remaining_bytes)190   DecodedFormatString(std::vector<DecodedArg>&& segments,
191                       size_t remaining_bytes)
192       : segments_(std::move(segments)), remaining_bytes_(remaining_bytes) {}
193 
194   DecodedFormatString(const DecodedFormatString&) = default;
195   DecodedFormatString(DecodedFormatString&&) = default;
196 
197   DecodedFormatString& operator=(const DecodedFormatString&) = default;
198   DecodedFormatString& operator=(DecodedFormatString&&) = default;
199 
200   // Returns the decoded format string. If any argument decoding errors
201   // occurred, the % conversion specifiers are included unmodified.
202   std::string value() const;
203 
204   // Returns the decoded format string, with error messages for any arguments
205   // that failed to decode.
206   std::string value_with_errors() const;
207 
ok()208   bool ok() const { return remaining_bytes() == 0u && decoding_errors() == 0u; }
209 
210   // Returns the number of bytes that remained after decoding.
remaining_bytes()211   size_t remaining_bytes() const { return remaining_bytes_; }
212 
213   // Returns the number of arguments in the format string. %% is not included.
214   size_t argument_count() const;
215 
216   // Returns the number of arguments that failed to decode.
217   size_t decoding_errors() const;
218 
219  private:
220   std::vector<DecodedArg> segments_;
221   size_t remaining_bytes_;
222 };
223 
224 // Represents a printf-style format string. The string is stored as a vector of
225 // StringSegments.
226 class FormatString {
227  public:
228   // Constructs a FormatString from a null-terminated format string.
229   FormatString(const char* format_string);
230 
231   // Formats this format string according to the provided encoded arguments and
232   // returns a string.
233   DecodedFormatString Format(span<const uint8_t> arguments) const;
234 
Format(std::string_view arguments)235   DecodedFormatString Format(std::string_view arguments) const {
236     return Format(span(reinterpret_cast<const uint8_t*>(arguments.data()),
237                        arguments.size()));
238   }
239 
240  private:
241   std::vector<StringSegment> segments_;
242 };
243 
244 PW_MODIFY_DIAGNOSTICS_PUSH();
245 PW_MODIFY_DIAGNOSTIC(ignored, "-Wformat-nonliteral");
246 // Implementation of DecodedArg::FromValue template function.
247 template <typename ArgumentType>
FromValue(const char * format,ArgumentType value,size_t raw_size_bytes,ArgStatus status)248 DecodedArg DecodedArg::FromValue(const char* format,
249                                  ArgumentType value,
250                                  size_t raw_size_bytes,
251                                  ArgStatus status) {
252   DecodedArg arg(format, raw_size_bytes, status);
253   const int value_size = std::snprintf(nullptr, 0u, format, value);
254 
255   if (value_size < 0) {
256     arg.status_.Update(ArgStatus::kDecodeError);
257     return arg;
258   }
259 
260   // Reserve space in the value string for the snprintf call.
261   arg.value_.append(value_size + 1, '\0');
262 
263   // Print the value to the string in the reserved space, then pop off the \0.
264   std::snprintf(arg.value_.data(), arg.value_.size(), format, value);
265   arg.value_.pop_back();  // Remove the trailing \0.
266 
267   return arg;
268 }
269 PW_MODIFY_DIAGNOSTICS_POP();
270 
271 }  // namespace pw::tokenizer
272