1 // Copyright 2020 The Pigweed Authors
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
4 // use this file except in compliance with the License. You may obtain a copy of
5 // the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
11 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
12 // License for the specific language governing permissions and limitations under
13 // the License.
14
15 #include "pw_tokenizer/internal/decode.h"
16
17 #include <algorithm>
18 #include <array>
19 #include <cctype>
20 #include <cstring>
21 #include <string>
22
23 #include "pw_varint/varint.h"
24
25 namespace pw::tokenizer {
26 namespace {
27
28 // Functions for parsing a printf format specifier.
SkipFlags(const char * str)29 size_t SkipFlags(const char* str) {
30 size_t i = 0;
31 while (str[i] == '-' || str[i] == '+' || str[i] == '#' || str[i] == ' ' ||
32 str[i] == '0') {
33 i += 1;
34 }
35 return i;
36 }
37
SkipAsteriskOrInteger(const char * str)38 size_t SkipAsteriskOrInteger(const char* str) {
39 if (str[0] == '*') {
40 return 1;
41 }
42
43 size_t i = (str[0] == '-' || str[0] == '+') ? 1 : 0;
44
45 while (std::isdigit(str[i])) {
46 i += 1;
47 }
48 return i;
49 }
50
ReadLengthModifier(const char * str)51 std::array<char, 2> ReadLengthModifier(const char* str) {
52 // Check for ll or hh.
53 if (str[0] == str[1] && (str[0] == 'l' || str[0] == 'h')) {
54 return {str[0], str[1]};
55 }
56 if (std::strchr("hljztL", str[0]) != nullptr) {
57 return {str[0]};
58 }
59 return {};
60 }
61
62 // Returns the error message that is used in place of a decoded arg when an
63 // error occurs.
ErrorMessage(ArgStatus status,std::string_view spec,std::string_view value)64 std::string ErrorMessage(ArgStatus status,
65 std::string_view spec,
66 std::string_view value) {
67 const char* message;
68 if (status.HasError(ArgStatus::kSkipped)) {
69 message = "SKIPPED";
70 } else if (status.HasError(ArgStatus::kMissing)) {
71 message = "MISSING";
72 } else if (status.HasError(ArgStatus::kDecodeError)) {
73 message = "ERROR";
74 } else {
75 message = "INTERNAL ERROR";
76 }
77
78 std::string result(PW_TOKENIZER_ARG_DECODING_ERROR_PREFIX);
79 result.append(spec);
80 result.push_back(' ');
81 result.append(message);
82
83 if (!value.empty()) {
84 result.push_back(' ');
85 result.push_back('(');
86 result.append(value);
87 result.push_back(')');
88 }
89
90 result.append(PW_TOKENIZER_ARG_DECODING_ERROR_SUFFIX);
91 return result;
92 }
93
94 } // namespace
95
DecodedArg(ArgStatus error,std::string_view spec,size_t raw_size_bytes,std::string_view value)96 DecodedArg::DecodedArg(ArgStatus error,
97 std::string_view spec,
98 size_t raw_size_bytes,
99 std::string_view value)
100 : value_(ErrorMessage(error, spec, value)),
101 spec_(spec),
102 raw_data_size_bytes_(raw_size_bytes),
103 status_(error) {}
104
ParseFormatSpec(const char * format)105 StringSegment StringSegment::ParseFormatSpec(const char* format) {
106 if (format[0] != '%' || format[1] == '\0') {
107 return StringSegment();
108 }
109
110 // Parse the format specifier.
111 size_t i = 1;
112
113 // Skip the flags.
114 i += SkipFlags(&format[i]);
115
116 // Skip the field width.
117 i += SkipAsteriskOrInteger(&format[i]);
118
119 // Skip the precision.
120 if (format[i] == '.') {
121 i += 1;
122 i += SkipAsteriskOrInteger(&format[i]);
123 }
124
125 // Read the length modifier.
126 const std::array<char, 2> length = ReadLengthModifier(&format[i]);
127 i += (length[0] == '\0' ? 0 : 1) + (length[1] == '\0' ? 0 : 1);
128
129 // Read the conversion specifier.
130 const char spec = format[i];
131
132 Type type;
133 if (spec == 's') {
134 type = kString;
135 } else if (spec == 'c' || spec == 'd' || spec == 'i') {
136 type = kSignedInt;
137 } else if (std::strchr("oxXup", spec) != nullptr) {
138 // The source size matters for unsigned integers because they need to be
139 // masked off to their correct length, since zig-zag decode sign extends.
140 // TODO(hepler): 64-bit targets likely have 64-bit l, j, z, and t. Also, p
141 // needs to be 64-bit on these targets.
142 type = length[0] == 'j' || length[1] == 'l' ? kUnsigned64 : kUnsigned32;
143 } else if (std::strchr("fFeEaAgG", spec) != nullptr) {
144 type = kFloatingPoint;
145 } else if (spec == '%' && i == 1) {
146 type = kPercent;
147 } else {
148 return StringSegment();
149 }
150
151 return {std::string_view(format, i + 1), type, VarargSize(length, spec)};
152 }
153
VarargSize(std::array<char,2> length,char spec)154 StringSegment::ArgSize StringSegment::VarargSize(std::array<char, 2> length,
155 char spec) {
156 // Use pointer size for %p or any other type (for which this doesn't matter).
157 if (std::strchr("cdioxXu", spec) == nullptr) {
158 return VarargSize<void*>();
159 }
160 if (length[0] == 'l') {
161 return length[1] == 'l' ? VarargSize<long long>() : VarargSize<long>();
162 }
163 if (length[0] == 'j') {
164 return VarargSize<intmax_t>();
165 }
166 if (length[0] == 'z') {
167 return VarargSize<size_t>();
168 }
169 if (length[0] == 't') {
170 return VarargSize<ptrdiff_t>();
171 }
172 return VarargSize<int>();
173 }
174
DecodeString(const span<const uint8_t> & arguments) const175 DecodedArg StringSegment::DecodeString(
176 const span<const uint8_t>& arguments) const {
177 if (arguments.empty()) {
178 return DecodedArg(ArgStatus::kMissing, text_);
179 }
180
181 ArgStatus status =
182 (arguments[0] & 0x80u) == 0u ? ArgStatus::kOk : ArgStatus::kTruncated;
183
184 const uint_fast8_t size = arguments[0] & 0x7Fu;
185
186 if (arguments.size() - 1 < size) {
187 status.Update(ArgStatus::kDecodeError);
188 span<const uint8_t> arg_val = arguments.subspan(1);
189 return DecodedArg(
190 status,
191 text_,
192 arguments.size(),
193 {reinterpret_cast<const char*>(arg_val.data()), arg_val.size()});
194 }
195
196 std::string value(reinterpret_cast<const char*>(arguments.data() + 1), size);
197
198 if (status.HasError(ArgStatus::kTruncated)) {
199 value.append("[...]");
200 }
201
202 return DecodedArg::FromValue(text_.c_str(), value.c_str(), 1 + size, status);
203 }
204
DecodeInteger(const span<const uint8_t> & arguments) const205 DecodedArg StringSegment::DecodeInteger(
206 const span<const uint8_t>& arguments) const {
207 if (arguments.empty()) {
208 return DecodedArg(ArgStatus::kMissing, text_);
209 }
210
211 int64_t value;
212 const size_t bytes = varint::Decode(as_bytes(arguments), &value);
213
214 if (bytes == 0u) {
215 return DecodedArg(ArgStatus::kDecodeError,
216 text_,
217 std::min(varint::kMaxVarint64SizeBytes,
218 static_cast<size_t>(arguments.size())));
219 }
220
221 // Unsigned ints need to be masked to their bit width due to sign extension.
222 if (type_ == kUnsigned32) {
223 value &= 0xFFFFFFFFu;
224 }
225
226 if (local_size_ == k32Bit) {
227 return DecodedArg::FromValue(
228 text_.c_str(), static_cast<uint32_t>(value), bytes);
229 }
230 return DecodedArg::FromValue(text_.c_str(), value, bytes);
231 }
232
DecodeFloatingPoint(const span<const uint8_t> & arguments) const233 DecodedArg StringSegment::DecodeFloatingPoint(
234 const span<const uint8_t>& arguments) const {
235 static_assert(sizeof(float) == 4u);
236 if (arguments.size() < sizeof(float)) {
237 return DecodedArg(ArgStatus::kMissing, text_);
238 }
239
240 float value;
241 std::memcpy(&value, arguments.data(), sizeof(value));
242 return DecodedArg::FromValue(text_.c_str(), value, sizeof(value));
243 }
244
Decode(const span<const uint8_t> & arguments) const245 DecodedArg StringSegment::Decode(const span<const uint8_t>& arguments) const {
246 switch (type_) {
247 case kLiteral:
248 return DecodedArg(text_);
249 case kPercent:
250 return DecodedArg("%");
251 case kString:
252 return DecodeString(arguments);
253 case kSignedInt:
254 case kUnsigned32:
255 case kUnsigned64:
256 return DecodeInteger(arguments);
257 case kFloatingPoint:
258 return DecodeFloatingPoint(arguments);
259 }
260
261 return DecodedArg(ArgStatus::kDecodeError, text_);
262 }
263
Skip() const264 DecodedArg StringSegment::Skip() const {
265 switch (type_) {
266 case kLiteral:
267 return DecodedArg(text_);
268 case kPercent:
269 return DecodedArg("%");
270 case kString:
271 case kSignedInt:
272 case kUnsigned32:
273 case kUnsigned64:
274 case kFloatingPoint:
275 default:
276 return DecodedArg(ArgStatus::kSkipped, text_);
277 }
278 }
279
value() const280 std::string DecodedFormatString::value() const {
281 std::string output;
282
283 for (const DecodedArg& arg : segments_) {
284 output.append(arg.ok() ? arg.value() : arg.spec());
285 }
286
287 return output;
288 }
289
value_with_errors() const290 std::string DecodedFormatString::value_with_errors() const {
291 std::string output;
292
293 for (const DecodedArg& arg : segments_) {
294 output.append(arg.value());
295 }
296
297 return output;
298 }
299
argument_count() const300 size_t DecodedFormatString::argument_count() const {
301 return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
302 return !arg.spec().empty();
303 });
304 }
305
decoding_errors() const306 size_t DecodedFormatString::decoding_errors() const {
307 return std::count_if(segments_.begin(), segments_.end(), [](const auto& arg) {
308 return !arg.ok();
309 });
310 }
311
FormatString(const char * format)312 FormatString::FormatString(const char* format) {
313 const char* text_start = format;
314
315 while (format[0] != '\0') {
316 if (StringSegment spec = StringSegment::ParseFormatSpec(format);
317 !spec.empty()) {
318 // Add the text segment seen so far (if any).
319 if (text_start < format) {
320 segments_.emplace_back(
321 std::string_view(text_start, format - text_start));
322 }
323
324 // Move along the index and text segment start.
325 format += spec.text().size();
326 text_start = format;
327
328 // Add the format specifier that was just found.
329 segments_.push_back(std::move(spec));
330 } else {
331 format += 1;
332 }
333 }
334
335 if (text_start < format) {
336 segments_.emplace_back(std::string_view(text_start, format - text_start));
337 }
338 }
339
Format(span<const uint8_t> arguments) const340 DecodedFormatString FormatString::Format(span<const uint8_t> arguments) const {
341 std::vector<DecodedArg> results;
342 bool skip = false;
343
344 for (const auto& segment : segments_) {
345 if (skip) {
346 results.push_back(segment.Skip());
347 } else {
348 results.push_back(segment.Decode(arguments));
349 arguments = arguments.subspan(results.back().raw_size_bytes());
350
351 // If an error occurred, skip decoding the remaining arguments.
352 if (!results.back().ok()) {
353 skip = true;
354 }
355 }
356 }
357
358 return DecodedFormatString(std::move(results), arguments.size());
359 }
360
361 } // namespace pw::tokenizer
362