1 // Copyright 2020 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
16 #define ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
17 
18 #include <limits.h>
19 #include <stddef.h>
20 #include <stdlib.h>
21 
22 #include <cassert>
23 #include <cstdint>
24 #include <initializer_list>
25 #include <iosfwd>
26 #include <iterator>
27 #include <memory>
28 #include <string>
29 #include <vector>
30 
31 #include "absl/strings/internal/str_format/checker.h"
32 #include "absl/strings/internal/str_format/constexpr_parser.h"
33 #include "absl/strings/internal/str_format/extension.h"
34 
35 namespace absl {
36 ABSL_NAMESPACE_BEGIN
37 namespace str_format_internal {
38 
39 std::string LengthModToString(LengthMod v);
40 
41 const char* ConsumeUnboundConversionNoInline(const char* p, const char* end,
42                                              UnboundConversion* conv,
43                                              int* next_arg);
44 
45 // Parse the format string provided in 'src' and pass the identified items into
46 // 'consumer'.
47 // Text runs will be passed by calling
48 //   Consumer::Append(string_view);
49 // ConversionItems will be passed by calling
50 //   Consumer::ConvertOne(UnboundConversion, string_view);
51 // In the case of ConvertOne, the string_view that is passed is the
52 // portion of the format string corresponding to the conversion, not including
53 // the leading %. On success, it returns true. On failure, it stops and returns
54 // false.
55 template <typename Consumer>
ParseFormatString(string_view src,Consumer consumer)56 bool ParseFormatString(string_view src, Consumer consumer) {
57   int next_arg = 0;
58   const char* p = src.data();
59   const char* const end = p + src.size();
60   while (p != end) {
61     const char* percent =
62         static_cast<const char*>(memchr(p, '%', static_cast<size_t>(end - p)));
63     if (!percent) {
64       // We found the last substring.
65       return consumer.Append(string_view(p, static_cast<size_t>(end - p)));
66     }
67     // We found a percent, so push the text run then process the percent.
68     if (ABSL_PREDICT_FALSE(!consumer.Append(
69             string_view(p, static_cast<size_t>(percent - p))))) {
70       return false;
71     }
72     if (ABSL_PREDICT_FALSE(percent + 1 >= end)) return false;
73 
74     auto tag = GetTagForChar(percent[1]);
75     if (tag.is_conv()) {
76       if (ABSL_PREDICT_FALSE(next_arg < 0)) {
77         // This indicates an error in the format string.
78         // The only way to get `next_arg < 0` here is to have a positional
79         // argument first which sets next_arg to -1 and then a non-positional
80         // argument.
81         return false;
82       }
83       p = percent + 2;
84 
85       // Keep this case separate from the one below.
86       // ConvertOne is more efficient when the compiler can see that the `basic`
87       // flag is set.
88       UnboundConversion conv;
89       conv.conv = tag.as_conv();
90       conv.arg_position = ++next_arg;
91       if (ABSL_PREDICT_FALSE(
92               !consumer.ConvertOne(conv, string_view(percent + 1, 1)))) {
93         return false;
94       }
95     } else if (percent[1] != '%') {
96       UnboundConversion conv;
97       p = ConsumeUnboundConversionNoInline(percent + 1, end, &conv, &next_arg);
98       if (ABSL_PREDICT_FALSE(p == nullptr)) return false;
99       if (ABSL_PREDICT_FALSE(!consumer.ConvertOne(
100               conv, string_view(percent + 1,
101                                 static_cast<size_t>(p - (percent + 1)))))) {
102         return false;
103       }
104     } else {
105       if (ABSL_PREDICT_FALSE(!consumer.Append("%"))) return false;
106       p = percent + 2;
107       continue;
108     }
109   }
110   return true;
111 }
112 
113 // Always returns true, or fails to compile in a constexpr context if s does not
114 // point to a constexpr char array.
EnsureConstexpr(string_view s)115 constexpr bool EnsureConstexpr(string_view s) {
116   return s.empty() || s[0] == s[0];
117 }
118 
119 class ParsedFormatBase {
120  public:
121   explicit ParsedFormatBase(
122       string_view format, bool allow_ignored,
123       std::initializer_list<FormatConversionCharSet> convs);
124 
ParsedFormatBase(const ParsedFormatBase & other)125   ParsedFormatBase(const ParsedFormatBase& other) { *this = other; }
126 
ParsedFormatBase(ParsedFormatBase && other)127   ParsedFormatBase(ParsedFormatBase&& other) { *this = std::move(other); }
128 
129   ParsedFormatBase& operator=(const ParsedFormatBase& other) {
130     if (this == &other) return *this;
131     has_error_ = other.has_error_;
132     items_ = other.items_;
133     size_t text_size = items_.empty() ? 0 : items_.back().text_end;
134     data_.reset(new char[text_size]);
135     memcpy(data_.get(), other.data_.get(), text_size);
136     return *this;
137   }
138 
139   ParsedFormatBase& operator=(ParsedFormatBase&& other) {
140     if (this == &other) return *this;
141     has_error_ = other.has_error_;
142     data_ = std::move(other.data_);
143     items_ = std::move(other.items_);
144     // Reset the vector to make sure the invariants hold.
145     other.items_.clear();
146     return *this;
147   }
148 
149   template <typename Consumer>
ProcessFormat(Consumer consumer)150   bool ProcessFormat(Consumer consumer) const {
151     const char* const base = data_.get();
152     string_view text(base, 0);
153     for (const auto& item : items_) {
154       const char* const end = text.data() + text.size();
155       text =
156           string_view(end, static_cast<size_t>((base + item.text_end) - end));
157       if (item.is_conversion) {
158         if (!consumer.ConvertOne(item.conv, text)) return false;
159       } else {
160         if (!consumer.Append(text)) return false;
161       }
162     }
163     return !has_error_;
164   }
165 
has_error()166   bool has_error() const { return has_error_; }
167 
168  private:
169   // Returns whether the conversions match and if !allow_ignored it verifies
170   // that all conversions are used by the format.
171   bool MatchesConversions(
172       bool allow_ignored,
173       std::initializer_list<FormatConversionCharSet> convs) const;
174 
175   struct ParsedFormatConsumer;
176 
177   struct ConversionItem {
178     bool is_conversion;
179     // Points to the past-the-end location of this element in the data_ array.
180     size_t text_end;
181     UnboundConversion conv;
182   };
183 
184   bool has_error_;
185   std::unique_ptr<char[]> data_;
186   std::vector<ConversionItem> items_;
187 };
188 
189 
190 // A value type representing a preparsed format.  These can be created, copied
191 // around, and reused to speed up formatting loops.
192 // The user must specify through the template arguments the conversion
193 // characters used in the format. This will be checked at compile time.
194 //
195 // This class uses Conv enum values to specify each argument.
196 // This allows for more flexibility as you can specify multiple possible
197 // conversion characters for each argument.
198 // ParsedFormat<char...> is a simplified alias for when the user only
199 // needs to specify a single conversion character for each argument.
200 //
201 // Example:
202 //   // Extended format supports multiple characters per argument:
203 //   using MyFormat = ExtendedParsedFormat<Conv::d | Conv::x>;
204 //   MyFormat GetFormat(bool use_hex) {
205 //     if (use_hex) return MyFormat("foo %x bar");
206 //     return MyFormat("foo %d bar");
207 //   }
208 //   // 'format' can be used with any value that supports 'd' and 'x',
209 //   // like `int`.
210 //   auto format = GetFormat(use_hex);
211 //   value = StringF(format, i);
212 //
213 // This class also supports runtime format checking with the ::New() and
214 // ::NewAllowIgnored() factory functions.
215 // This is the only API that allows the user to pass a runtime specified format
216 // string. These factory functions will return NULL if the format does not match
217 // the conversions requested by the user.
218 template <FormatConversionCharSet... C>
219 class ExtendedParsedFormat : public str_format_internal::ParsedFormatBase {
220  public:
ExtendedParsedFormat(string_view format)221   explicit ExtendedParsedFormat(string_view format)
222 #ifdef ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
223       __attribute__((
224           enable_if(str_format_internal::EnsureConstexpr(format),
225                     "Format string is not constexpr."),
226           enable_if(str_format_internal::ValidFormatImpl<C...>(format),
227                     "Format specified does not match the template arguments.")))
228 #endif  // ABSL_INTERNAL_ENABLE_FORMAT_CHECKER
229       : ExtendedParsedFormat(format, false) {
230   }
231 
232   // ExtendedParsedFormat factory function.
233   // The user still has to specify the conversion characters, but they will not
234   // be checked at compile time. Instead, it will be checked at runtime.
235   // This delays the checking to runtime, but allows the user to pass
236   // dynamically sourced formats.
237   // It returns NULL if the format does not match the conversion characters.
238   // The user is responsible for checking the return value before using it.
239   //
240   // The 'New' variant will check that all the specified arguments are being
241   // consumed by the format and return NULL if any argument is being ignored.
242   // The 'NewAllowIgnored' variant will not verify this and will allow formats
243   // that ignore arguments.
New(string_view format)244   static std::unique_ptr<ExtendedParsedFormat> New(string_view format) {
245     return New(format, false);
246   }
NewAllowIgnored(string_view format)247   static std::unique_ptr<ExtendedParsedFormat> NewAllowIgnored(
248       string_view format) {
249     return New(format, true);
250   }
251 
252  private:
New(string_view format,bool allow_ignored)253   static std::unique_ptr<ExtendedParsedFormat> New(string_view format,
254                                                    bool allow_ignored) {
255     std::unique_ptr<ExtendedParsedFormat> conv(
256         new ExtendedParsedFormat(format, allow_ignored));
257     if (conv->has_error()) return nullptr;
258     return conv;
259   }
260 
ExtendedParsedFormat(string_view s,bool allow_ignored)261   ExtendedParsedFormat(string_view s, bool allow_ignored)
262       : ParsedFormatBase(s, allow_ignored, {C...}) {}
263 };
264 }  // namespace str_format_internal
265 ABSL_NAMESPACE_END
266 }  // namespace absl
267 
268 #endif  // ABSL_STRINGS_INTERNAL_STR_FORMAT_PARSER_H_
269