xref: /aosp_15_r20/external/pigweed/pw_tokenizer/decode.cc (revision 61c4878ac05f98d0ceed94b57d316916de578985)
1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 //     https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14 
15 #include "pw_tokenizer/internal/decode.h"
16 
17 #include <algorithm>
18 #include <array>
19 #include <cctype>
20 #include <cstring>
21 #include <string>
22 
23 #include "pw_varint/varint.h"
24 
25 namespace pw::tokenizer {
26 namespace {
27 
28 // Functions for parsing a printf format specifier.
SkipFlags(const char * str)29 size_t SkipFlags(const char* str) {
30   size_t i = 0;
31   while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' ||
32          str[i] == '0') {
33     i += 1;
34   }
35   return i;
36 }
37 
SkipAsteriskOrInteger(const char * str)38 size_t SkipAsteriskOrInteger(const char* str) {
39   if (str[0] == '*') {
40     return 1;
41   }
42 
43   size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0;
44 
45   while (std::isdigit(str[i])) {
46     i += 1;
47   }
48   return i;
49 }
50 
ReadLengthModifier(const char * str)51 std::array<char, 2> ReadLengthModifier(const char* str) {
52   // Check for ll or hh.
53   if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) {
54     return {str[0], str[1]};
55   }
56   if (std::strchr("hljztL", str[0]) != nullptr) {
57     return {str[0]};
58   }
59   return {};
60 }
61 
62 // Returns the error message that is used in place of a decoded arg when an
63 // error occurs.
ErrorMessage(ArgStatus status,std::string_view spec,std::string_view value)64 std::string ErrorMessage(ArgStatus status,
65                          std::string_view spec,
66                          std::string_view value) {
67   const char* message;
68   if (status.HasError(ArgStatus::kSkipped)) {
69     message = "SKIPPED";
70   } else if (status.HasError(ArgStatus::kMissing)) {
71     message = "MISSING";
72   } else if (status.HasError(ArgStatus::kDecodeError)) {
73     message = "ERROR";
74   } else {
75     message = "INTERNAL ERROR";
76   }
77 
78   std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX);
79   result.append(spec);
80   result.push_back(' ');
81   result.append(message);
82 
83   if (!value.empty()) {
84     result.push_back(' ');
85     result.push_back('(');
86     result.append(value);
87     result.push_back(')');
88   }
89 
90   result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
91   return result;
92 }
93 
94 }  // namespace
95 
DecodedArg(ArgStatus error,std::string_view spec,size_t raw_size_bytes,std::string_view value)96 DecodedArg::DecodedArg(ArgStatus error,
97                        std::string_view spec,
98                        size_t raw_size_bytes,
99                        std::string_view value)
100     : value_(ErrorMessage(error, spec, value)),
101       spec_(spec),
102       raw_data_size_bytes_(raw_size_bytes),
103       status_(error) {}
104 
ParseFormatSpec(const char * format)105 StringSegment StringSegment::ParseFormatSpec(const char* format) {
106   if (format[0] != '%' || format[1] == '\0') {
107     return StringSegment();
108   }
109 
110   // Parse the format specifier.
111   size_t i = 1;
112 
113   // Skip the flags.
114   i += SkipFlags(&format[i]);
115 
116   // Skip the field width.
117   i += SkipAsteriskOrInteger(&format[i]);
118 
119   // Skip the precision.
120   if (format[i] == '.') {
121     i += 1;
122     i += SkipAsteriskOrInteger(&format[i]);
123   }
124 
125   // Read the length modifier.
126   const std::array<char, 2> length = ReadLengthModifier(&format[i]);
127   i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
128 
129   // Read the conversion specifier.
130   const char spec = format[i];
131 
132   Type type;
133   if (spec == 's') {
134     type = kString;
135   } else if (spec == 'c' || spec == 'd' || spec == 'i') {
136     type = kSignedInt;
137   } else if (std::strchr("oxXup", spec) != nullptr) {
138     // The source size matters for unsigned integers because they need to be
139     // masked off to their correct length, since zig-zag decode sign extends.
140     // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p
141     // needs to be 64-bit on these targets.
142     type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32;
143   } else if (std::strchr("fFeEaAgG", spec) != nullptr) {
144     type = kFloatingPoint;
145   } else if (spec == '%' && i == 1) {
146     type = kPercent;
147   } else {
148     return StringSegment();
149   }
150 
151   return {std::string_view(format, i + 1), type, VarargSize(length, spec)};
152 }
153 
VarargSize(std::array<char,2> length,char spec)154 StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length,
155                                                  char spec) {
156   // Use pointer size for %p or any other type (for which this doesn't matter).
157   if (std::strchr("cdioxXu", spec) == nullptr) {
158     return VarargSize<void*>();
159   }
160   if (length[0] == 'l') {
161     return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>();
162   }
163   if (length[0] == 'j') {
164     return VarargSize<intmax_t>();
165   }
166   if (length[0] == 'z') {
167     return VarargSize<size_t>();
168   }
169   if (length[0] == 't') {
170     return VarargSize<ptrdiff_t>();
171   }
172   return VarargSize<int>();
173 }
174 
DecodeString(const span<const uint8_t> & arguments) const175 DecodedArg StringSegment::DecodeString(
176     const span<const uint8_t>& arguments) const {
177   if (arguments.empty()) {
178     return DecodedArg(ArgStatus::kMissing, text_);
179   }
180 
181   ArgStatus status =
182       (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated;
183 
184   const uint_fast8_t size = arguments[0] & 0x7Fu;
185 
186   if (arguments.size() - 1 < size) {
187     status.Update(ArgStatus::kDecodeError);
188     span<const uint8_t> arg_val = arguments.subspan(1);
189     return DecodedArg(
190         status,
191         text_,
192         arguments.size(),
193         {reinterpret_cast<const char*>(arg_val.data()), arg_val.size()});
194   }
195 
196   std::string value(reinterpret_cast<const char*>(arguments.data() + 1), size);
197 
198   if (status.HasError(ArgStatus::kTruncated)) {
199     value.append("[...]");
200   }
201 
202   return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status);
203 }
204 
DecodeInteger(const span<const uint8_t> & arguments) const205 DecodedArg StringSegment::DecodeInteger(
206     const span<const uint8_t>& arguments) const {
207   if (arguments.empty()) {
208     return DecodedArg(ArgStatus::kMissing, text_);
209   }
210 
211   int64_t value;
212   const size_t bytes = varint::Decode(as_bytes(arguments), &value);
213 
214   if (bytes == 0u) {
215     return DecodedArg(ArgStatus::kDecodeError,
216                       text_,
217                       std::min(varint::kMaxVarint64SizeBytes,
218                                static_cast<size_t>(arguments.size())));
219   }
220 
221   // Unsigned ints need to be masked to their bit width due to sign extension.
222   if (type_ == kUnsigned32) {
223     value &= 0xFFFFFFFFu;
224   }
225 
226   if (local_size_ == k32Bit) {
227     return DecodedArg::FromValue(
228         text_.c_str(), static_cast<uint32_t>(value), bytes);
229   }
230   return DecodedArg::FromValue(text_.c_str(), value, bytes);
231 }
232 
DecodeFloatingPoint(const span<const uint8_t> & arguments) const233 DecodedArg StringSegment::DecodeFloatingPoint(
234     const span<const uint8_t>& arguments) const {
235   static_assert(sizeof(float) == 4u);
236   if (arguments.size() < sizeof(float)) {
237     return DecodedArg(ArgStatus::kMissing, text_);
238   }
239 
240   float value;
241   std::memcpy(&value, arguments.data(), sizeof(value));
242   return DecodedArg::FromValue(text_.c_str(), value, sizeof(value));
243 }
244 
Decode(const span<const uint8_t> & arguments) const245 DecodedArg StringSegment::Decode(const span<const uint8_t>& arguments) const {
246   switch (type_) {
247     case kLiteral:
248       return DecodedArg(text_);
249     case kPercent:
250       return DecodedArg("%");
251     case kString:
252       return DecodeString(arguments);
253     case kSignedInt:
254     case kUnsigned32:
255     case kUnsigned64:
256       return DecodeInteger(arguments);
257     case kFloatingPoint:
258       return DecodeFloatingPoint(arguments);
259   }
260 
261   return DecodedArg(ArgStatus::kDecodeError, text_);
262 }
263 
Skip() const264 DecodedArg StringSegment::Skip() const {
265   switch (type_) {
266     case kLiteral:
267       return DecodedArg(text_);
268     case kPercent:
269       return DecodedArg("%");
270     case kString:
271     case kSignedInt:
272     case kUnsigned32:
273     case kUnsigned64:
274     case kFloatingPoint:
275     default:
276       return DecodedArg(ArgStatus::kSkipped, text_);
277   }
278 }
279 
value() const280 std::string DecodedFormatString::value() const {
281   std::string output;
282 
283   for (const DecodedArg& arg : segments_) {
284     output.append(arg.ok() ? arg.value() : arg.spec());
285   }
286 
287   return output;
288 }
289 
value_with_errors() const290 std::string DecodedFormatString::value_with_errors() const {
291   std::string output;
292 
293   for (const DecodedArg& arg : segments_) {
294     output.append(arg.value());
295   }
296 
297   return output;
298 }
299 
argument_count() const300 size_t DecodedFormatString::argument_count() const {
301   return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
302     return !arg.spec().empty();
303   });
304 }
305 
decoding_errors() const306 size_t DecodedFormatString::decoding_errors() const {
307   return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
308     return !arg.ok();
309   });
310 }
311 
FormatString(const char * format)312 FormatString::FormatString(const char* format) {
313   const char* text_start = format;
314 
315   while (format[0] != '\0') {
316     if (StringSegment spec = StringSegment::ParseFormatSpec(format);
317         !spec.empty()) {
318       // Add the text segment seen so far (if any).
319       if (text_start < format) {
320         segments_.emplace_back(
321             std::string_view(text_start, format - text_start));
322       }
323 
324       // Move along the index and text segment start.
325       format += spec.text().size();
326       text_start = format;
327 
328       // Add the format specifier that was just found.
329       segments_.push_back(std::move(spec));
330     } else {
331       format += 1;
332     }
333   }
334 
335   if (text_start < format) {
336     segments_.emplace_back(std::string_view(text_start, format - text_start));
337   }
338 }
339 
Format(span<const uint8_t> arguments) const340 DecodedFormatString FormatString::Format(span<const uint8_t> arguments) const {
341   std::vector<DecodedArg> results;
342   bool skip = false;
343 
344   for (const auto& segment : segments_) {
345     if (skip) {
346       results.push_back(segment.Skip());
347     } else {
348       results.push_back(segment.Decode(arguments));
349       arguments = arguments.subspan(results.back().raw_size_bytes());
350 
351       // If an error occurred, skip decoding the remaining arguments.
352       if (!results.back().ok()) {
353         skip = true;
354       }
355     }
356   }
357 
358   return DecodedFormatString(std::move(results), arguments.size());
359 }
360 
361 }  // namespace pw::tokenizer
362