xref: /aosp_15_r20/external/abseil-cpp/absl/strings/internal/str_format/parser.h (revision 9356374a3709195abf420251b3e825997ff56c0f)
1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
17 
18 #include <stddef.h>
19 #include <stdlib.h>
20 
21 #include <cassert>
22 #include <cstring>
23 #include <initializer_list>
24 #include <memory>
25 #include <string>
26 #include <utility>
27 #include <vector>
28 
29 #include "absl/base/config.h"
30 #include "absl/base/optimization.h"
31 #include "absl/strings/internal/str_format/checker.h"
32 #include "absl/strings/internal/str_format/constexpr_parser.h"
33 #include "absl/strings/internal/str_format/extension.h"
34 #include "absl/strings/string_view.h"
35 
36 namespace absl {
37 ABSL_NAMESPACE_BEGIN
38 namespace str_format_internal {
39 
40 std::string LengthModToString(LengthMod v);
41 
42 const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
43                                              UnboundConversion* conv,
44                                              int* next_arg);
45 
46 // Parse the format string provided in 'src' and pass the identified items into
47 // 'consumer'.
48 // Text runs will be passed by calling
49 //   Consumer::Append(string_view);
50 // ConversionItems will be passed by calling
51 //   Consumer::ConvertOne(UnboundConversion, string_view);
52 // In the case of ConvertOne, the string_view that is passed is the
53 // portion of the format string corresponding to the conversion, not including
54 // the leading %. On success, it returns true. On failure, it stops and returns
55 // false.
56 template <typename Consumer>
ParseFormatString(string_view src,Consumer consumer)57 bool ParseFormatString(string_view src, Consumer consumer) {
58   int next_arg = 0;
59   const char* p = src.data();
60   const char* const end = p + src.size();
61   while (p != end) {
62     const char* percent =
63         static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p)));
64     if (!percent) {
65       // We found the last substring.
66       return consumer.Append(string_view(p, static_cast<size_t>(end - p)));
67     }
68     // We found a percent, so push the text run then process the percent.
69     if (ABSL_PREDICT_FALSE(!consumer.Append(
70             string_view(p, static_cast<size_t>(percent - p))))) {
71       return false;
72     }
73     if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
74 
75     auto tag = GetTagForChar(percent[1]);
76     if (tag.is_conv()) {
77       if (ABSL_PREDICT_FALSE(next_arg < 0)) {
78         // This indicates an error in the format string.
79         // The only way to get `next_arg < 0` here is to have a positional
80         // argument first which sets next_arg to -1 and then a non-positional
81         // argument.
82         return false;
83       }
84       p = percent + 2;
85 
86       // Keep this case separate from the one below.
87       // ConvertOne is more efficient when the compiler can see that the `basic`
88       // flag is set.
89       UnboundConversion conv;
90       conv.conv = tag.as_conv();
91       conv.arg_position = ++next_arg;
92       if (ABSL_PREDICT_FALSE(
93               !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
94         return false;
95       }
96     } else if (percent[1] != '%') {
97       UnboundConversion conv;
98       p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
99       if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
100       if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
101               conv, string_view(percent + 1,
102                                 static_cast<size_t>(p - (percent + 1)))))) {
103         return false;
104       }
105     } else {
106       if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
107       p = percent + 2;
108       continue;
109     }
110   }
111   return true;
112 }
113 
114 // Always returns true, or fails to compile in a constexpr context if s does not
115 // point to a constexpr char array.
EnsureConstexpr(string_view s)116 constexpr bool EnsureConstexpr(string_view s) {
117   return s.empty() || s[0] == s[0];
118 }
119 
120 class ParsedFormatBase {
121  public:
122   explicit ParsedFormatBase(
123       string_view format, bool allow_ignored,
124       std::initializer_list<FormatConversionCharSet> convs);
125 
ParsedFormatBase(const ParsedFormatBase & other)126   ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
127 
ParsedFormatBase(ParsedFormatBase && other)128   ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
129 
130   ParsedFormatBase& operator=(const ParsedFormatBase& other) {
131     if (this == &other) return *this;
132     has_error_ = other.has_error_;
133     items_ = other.items_;
134     size_t text_size = items_.empty() ? 0 : items_.back().text_end;
135     data_.reset(new char[text_size]);
136     memcpy(data_.get(), other.data_.get(), text_size);
137     return *this;
138   }
139 
140   ParsedFormatBase& operator=(ParsedFormatBase&& other) {
141     if (this == &other) return *this;
142     has_error_ = other.has_error_;
143     data_ = std::move(other.data_);
144     items_ = std::move(other.items_);
145     // Reset the vector to make sure the invariants hold.
146     other.items_.clear();
147     return *this;
148   }
149 
150   template <typename Consumer>
ProcessFormat(Consumer consumer)151   bool ProcessFormat(Consumer consumer) const {
152     const char* const base = data_.get();
153     string_view text(base, 0);
154     for (const auto& item : items_) {
155       const char* const end = text.data() + text.size();
156       text =
157           string_view(end, static_cast<size_t>((base + item.text_end) - end));
158       if (item.is_conversion) {
159         if (!consumer.ConvertOne(item.conv, text)) return false;
160       } else {
161         if (!consumer.Append(text)) return false;
162       }
163     }
164     return !has_error_;
165   }
166 
has_error()167   bool has_error() const { return has_error_; }
168 
169  private:
170   // Returns whether the conversions match and if !allow_ignored it verifies
171   // that all conversions are used by the format.
172   bool MatchesConversions(
173       bool allow_ignored,
174       std::initializer_list<FormatConversionCharSet> convs) const;
175 
176   struct ParsedFormatConsumer;
177 
178   struct ConversionItem {
179     bool is_conversion;
180     // Points to the past-the-end location of this element in the data_ array.
181     size_t text_end;
182     UnboundConversion conv;
183   };
184 
185   bool has_error_;
186   std::unique_ptr<char[]> data_;
187   std::vector<ConversionItem> items_;
188 };
189 
190 
191 // A value type representing a preparsed format.  These can be created, copied
192 // around, and reused to speed up formatting loops.
193 // The user must specify through the template arguments the conversion
194 // characters used in the format. This will be checked at compile time.
195 //
196 // This class uses Conv enum values to specify each argument.
197 // This allows for more flexibility as you can specify multiple possible
198 // conversion characters for each argument.
199 // ParsedFormat<char...> is a simplified alias for when the user only
200 // needs to specify a single conversion character for each argument.
201 //
202 // Example:
203 //   // Extended format supports multiple characters per argument:
204 //   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
205 //   MyFormat GetFormat(bool use_hex) {
206 //     if (use_hex) return MyFormat("foo %x bar");
207 //     return MyFormat("foo %d bar");
208 //   }
209 //   // 'format' can be used with any value that supports 'd' and 'x',
210 //   // like `int`.
211 //   auto format = GetFormat(use_hex);
212 //   value = StringF(format, i);
213 //
214 // This class also supports runtime format checking with the ::New() and
215 // ::NewAllowIgnored() factory functions.
216 // This is the only API that allows the user to pass a runtime specified format
217 // string. These factory functions will return NULL if the format does not match
218 // the conversions requested by the user.
219 template <FormatConversionCharSet... C>
220 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
221  public:
ExtendedParsedFormat(string_view format)222   explicit ExtendedParsedFormat(string_view format)
223 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
224       __attribute__((
225           enable_if(str_format_internal::EnsureConstexpr(format),
226                     "Format string is not constexpr."),
227           enable_if(str_format_internal::ValidFormatImpl<C...>(format),
228                     "Format specified does not match the template arguments.")))
229 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
230       : ExtendedParsedFormat(format, false) {
231   }
232 
233   // ExtendedParsedFormat factory function.
234   // The user still has to specify the conversion characters, but they will not
235   // be checked at compile time. Instead, it will be checked at runtime.
236   // This delays the checking to runtime, but allows the user to pass
237   // dynamically sourced formats.
238   // It returns NULL if the format does not match the conversion characters.
239   // The user is responsible for checking the return value before using it.
240   //
241   // The 'New' variant will check that all the specified arguments are being
242   // consumed by the format and return NULL if any argument is being ignored.
243   // The 'NewAllowIgnored' variant will not verify this and will allow formats
244   // that ignore arguments.
New(string_view format)245   static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
246     return New(format, false);
247   }
NewAllowIgnored(string_view format)248   static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
249       string_view format) {
250     return New(format, true);
251   }
252 
253  private:
New(string_view format,bool allow_ignored)254   static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
255                                                    bool allow_ignored) {
256     std::unique_ptr<ExtendedParsedFormat> conv(
257         new ExtendedParsedFormat(format, allow_ignored));
258     if (conv->has_error()) return nullptr;
259     return conv;
260   }
261 
ExtendedParsedFormat(string_view s,bool allow_ignored)262   ExtendedParsedFormat(string_view s, bool allow_ignored)
263       : ParsedFormatBase(s, allow_ignored, {C...}) {}
264 };
265 }  // namespace str_format_internal
266 ABSL_NAMESPACE_END
267 }  // namespace absl
268 
269 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
270