1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
17
18 #include <stddef.h>
19 #include <stdlib.h>
20
21 #include <cassert>
22 #include <cstring>
23 #include <initializer_list>
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28
29 #include "absl/base/config.h"
30 #include "absl/base/optimization.h"
31 #include "absl/strings/internal/str_format/checker.h"
32 #include "absl/strings/internal/str_format/constexpr_parser.h"
33 #include "absl/strings/internal/str_format/extension.h"
34 #include "absl/strings/string_view.h"
35
36 namespace absl {
37 ABSL_NAMESPACE_BEGIN
38 namespace str_format_internal {
39
40 std::string LengthModToString(LengthMod v);
41
42 const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
43 UnboundConversion* conv,
44 int* next_arg);
45
46 // Parse the format string provided in 'src' and pass the identified items into
47 // 'consumer'.
48 // Text runs will be passed by calling
49 // Consumer::Append(string_view);
50 // ConversionItems will be passed by calling
51 // Consumer::ConvertOne(UnboundConversion, string_view);
52 // In the case of ConvertOne, the string_view that is passed is the
53 // portion of the format string corresponding to the conversion, not including
54 // the leading %. On success, it returns true. On failure, it stops and returns
55 // false.
56 template <typename Consumer>
ParseFormatString(string_view src,Consumer consumer)57 bool ParseFormatString(string_view src, Consumer consumer) {
58 int next_arg = 0;
59 const char* p = src.data();
60 const char* const end = p + src.size();
61 while (p != end) {
62 const char* percent =
63 static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p)));
64 if (!percent) {
65 // We found the last substring.
66 return consumer.Append(string_view(p, static_cast<size_t>(end - p)));
67 }
68 // We found a percent, so push the text run then process the percent.
69 if (ABSL_PREDICT_FALSE(!consumer.Append(
70 string_view(p, static_cast<size_t>(percent - p))))) {
71 return false;
72 }
73 if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
74
75 auto tag = GetTagForChar(percent[1]);
76 if (tag.is_conv()) {
77 if (ABSL_PREDICT_FALSE(next_arg < 0)) {
78 // This indicates an error in the format string.
79 // The only way to get `next_arg < 0` here is to have a positional
80 // argument first which sets next_arg to -1 and then a non-positional
81 // argument.
82 return false;
83 }
84 p = percent + 2;
85
86 // Keep this case separate from the one below.
87 // ConvertOne is more efficient when the compiler can see that the `basic`
88 // flag is set.
89 UnboundConversion conv;
90 conv.conv = tag.as_conv();
91 conv.arg_position = ++next_arg;
92 if (ABSL_PREDICT_FALSE(
93 !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
94 return false;
95 }
96 } else if (percent[1] != '%') {
97 UnboundConversion conv;
98 p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
99 if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
100 if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
101 conv, string_view(percent + 1,
102 static_cast<size_t>(p - (percent + 1)))))) {
103 return false;
104 }
105 } else {
106 if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
107 p = percent + 2;
108 continue;
109 }
110 }
111 return true;
112 }
113
114 // Always returns true, or fails to compile in a constexpr context if s does not
115 // point to a constexpr char array.
EnsureConstexpr(string_view s)116 constexpr bool EnsureConstexpr(string_view s) {
117 return s.empty() || s[0] == s[0];
118 }
119
120 class ParsedFormatBase {
121 public:
122 explicit ParsedFormatBase(
123 string_view format, bool allow_ignored,
124 std::initializer_list<FormatConversionCharSet> convs);
125
ParsedFormatBase(const ParsedFormatBase & other)126 ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
127
ParsedFormatBase(ParsedFormatBase && other)128 ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
129
130 ParsedFormatBase& operator=(const ParsedFormatBase& other) {
131 if (this == &other) return *this;
132 has_error_ = other.has_error_;
133 items_ = other.items_;
134 size_t text_size = items_.empty() ? 0 : items_.back().text_end;
135 data_.reset(new char[text_size]);
136 memcpy(data_.get(), other.data_.get(), text_size);
137 return *this;
138 }
139
140 ParsedFormatBase& operator=(ParsedFormatBase&& other) {
141 if (this == &other) return *this;
142 has_error_ = other.has_error_;
143 data_ = std::move(other.data_);
144 items_ = std::move(other.items_);
145 // Reset the vector to make sure the invariants hold.
146 other.items_.clear();
147 return *this;
148 }
149
150 template <typename Consumer>
ProcessFormat(Consumer consumer)151 bool ProcessFormat(Consumer consumer) const {
152 const char* const base = data_.get();
153 string_view text(base, 0);
154 for (const auto& item : items_) {
155 const char* const end = text.data() + text.size();
156 text =
157 string_view(end, static_cast<size_t>((base + item.text_end) - end));
158 if (item.is_conversion) {
159 if (!consumer.ConvertOne(item.conv, text)) return false;
160 } else {
161 if (!consumer.Append(text)) return false;
162 }
163 }
164 return !has_error_;
165 }
166
has_error()167 bool has_error() const { return has_error_; }
168
169 private:
170 // Returns whether the conversions match and if !allow_ignored it verifies
171 // that all conversions are used by the format.
172 bool MatchesConversions(
173 bool allow_ignored,
174 std::initializer_list<FormatConversionCharSet> convs) const;
175
176 struct ParsedFormatConsumer;
177
178 struct ConversionItem {
179 bool is_conversion;
180 // Points to the past-the-end location of this element in the data_ array.
181 size_t text_end;
182 UnboundConversion conv;
183 };
184
185 bool has_error_;
186 std::unique_ptr<char[]> data_;
187 std::vector<ConversionItem> items_;
188 };
189
190
191 // A value type representing a preparsed format. These can be created, copied
192 // around, and reused to speed up formatting loops.
193 // The user must specify through the template arguments the conversion
194 // characters used in the format. This will be checked at compile time.
195 //
196 // This class uses Conv enum values to specify each argument.
197 // This allows for more flexibility as you can specify multiple possible
198 // conversion characters for each argument.
199 // ParsedFormat<char...> is a simplified alias for when the user only
200 // needs to specify a single conversion character for each argument.
201 //
202 // Example:
203 // // Extended format supports multiple characters per argument:
204 // using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
205 // MyFormat GetFormat(bool use_hex) {
206 // if (use_hex) return MyFormat("foo %x bar");
207 // return MyFormat("foo %d bar");
208 // }
209 // // 'format' can be used with any value that supports 'd' and 'x',
210 // // like `int`.
211 // auto format = GetFormat(use_hex);
212 // value = StringF(format, i);
213 //
214 // This class also supports runtime format checking with the ::New() and
215 // ::NewAllowIgnored() factory functions.
216 // This is the only API that allows the user to pass a runtime specified format
217 // string. These factory functions will return NULL if the format does not match
218 // the conversions requested by the user.
219 template <FormatConversionCharSet... C>
220 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
221 public:
ExtendedParsedFormat(string_view format)222 explicit ExtendedParsedFormat(string_view format)
223 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
224 __attribute__((
225 enable_if(str_format_internal::EnsureConstexpr(format),
226 "Format string is not constexpr."),
227 enable_if(str_format_internal::ValidFormatImpl<C...>(format),
228 "Format specified does not match the template arguments.")))
229 #endif // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
230 : ExtendedParsedFormat(format, false) {
231 }
232
233 // ExtendedParsedFormat factory function.
234 // The user still has to specify the conversion characters, but they will not
235 // be checked at compile time. Instead, it will be checked at runtime.
236 // This delays the checking to runtime, but allows the user to pass
237 // dynamically sourced formats.
238 // It returns NULL if the format does not match the conversion characters.
239 // The user is responsible for checking the return value before using it.
240 //
241 // The 'New' variant will check that all the specified arguments are being
242 // consumed by the format and return NULL if any argument is being ignored.
243 // The 'NewAllowIgnored' variant will not verify this and will allow formats
244 // that ignore arguments.
New(string_view format)245 static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
246 return New(format, false);
247 }
NewAllowIgnored(string_view format)248 static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
249 string_view format) {
250 return New(format, true);
251 }
252
253 private:
New(string_view format,bool allow_ignored)254 static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
255 bool allow_ignored) {
256 std::unique_ptr<ExtendedParsedFormat> conv(
257 new ExtendedParsedFormat(format, allow_ignored));
258 if (conv->has_error()) return nullptr;
259 return conv;
260 }
261
ExtendedParsedFormat(string_view s,bool allow_ignored)262 ExtendedParsedFormat(string_view s, bool allow_ignored)
263 : ParsedFormatBase(s, allow_ignored, {C...}) {}
264 };
265 } // namespace str_format_internal
266 ABSL_NAMESPACE_END
267 } // namespace absl
268
269 #endif // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
270