xref: /aosp_15_r20/external/angle/third_party/abseil-cpp/absl/strings/str_split.h (revision 8975f5c5ed3d1c378011245431ada316dfb6f244)
1 //
2 // Copyright 2017 The Abseil Authors.
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 //
8 //      https://www.apache.org/licenses/LICENSE-2.0
9 //
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
16 // -----------------------------------------------------------------------------
17 // File: str_split.h
18 // -----------------------------------------------------------------------------
19 //
20 // This file contains functions for splitting strings. It defines the main
21 // `StrSplit()` function, several delimiters for determining the boundaries on
22 // which to split the string, and predicates for filtering delimited results.
23 // `StrSplit()` adapts the returned collection to the type specified by the
24 // caller.
25 //
26 // Example:
27 //
28 //   // Splits the given string on commas. Returns the results in a
29 //   // vector of strings.
30 //   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
31 //   // Can also use ","
32 //   // v[0] == "a", v[1] == "b", v[2] == "c"
33 //
34 // See StrSplit() below for more information.
35 #ifndef ABSL_STRINGS_STR_SPLIT_H_
36 #define ABSL_STRINGS_STR_SPLIT_H_
37 
38 #include <algorithm>
39 #include <cstddef>
40 #include <map>
41 #include <set>
42 #include <string>
43 #include <utility>
44 #include <vector>
45 
46 #include "absl/base/internal/raw_logging.h"
47 #include "absl/base/macros.h"
48 #include "absl/strings/internal/str_split_internal.h"
49 #include "absl/strings/string_view.h"
50 #include "absl/strings/strip.h"
51 
52 namespace absl {
53 ABSL_NAMESPACE_BEGIN
54 
55 //------------------------------------------------------------------------------
56 // Delimiters
57 //------------------------------------------------------------------------------
58 //
59 // `StrSplit()` uses delimiters to define the boundaries between elements in the
60 // provided input. Several `Delimiter` types are defined below. If a string
61 // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
62 // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
63 // were passed a `ByString` delimiter.
64 //
65 // A `Delimiter` is an object with a `Find()` function that knows how to find
66 // the first occurrence of itself in a given `absl::string_view`.
67 //
68 // The following `Delimiter` types are available for use within `StrSplit()`:
69 //
70 //   - `ByString` (default for string arguments)
71 //   - `ByChar` (default for a char argument)
72 //   - `ByAnyChar`
73 //   - `ByLength`
74 //   - `MaxSplits`
75 //
76 // A Delimiter's `Find()` member function will be passed an input `text` that is
77 // to be split and a position (`pos`) to begin searching for the next delimiter
78 // in `text`. The returned absl::string_view should refer to the next occurrence
79 // (after `pos`) of the represented delimiter; this returned absl::string_view
80 // represents the next location where the input `text` should be broken.
81 //
82 // The returned absl::string_view may be zero-length if the Delimiter does not
83 // represent a part of the string (e.g., a fixed-length delimiter). If no
84 // delimiter is found in the input `text`, a zero-length absl::string_view
85 // referring to `text.end()` should be returned (e.g.,
86 // `text.substr(text.size())`). It is important that the returned
87 // absl::string_view always be within the bounds of the input `text` given as an
88 // argument--it must not refer to a string that is physically located outside of
89 // the given string.
90 //
91 // The following example is a simple Delimiter object that is created with a
92 // single char and will look for that char in the text passed to the `Find()`
93 // function:
94 //
95 //   struct SimpleDelimiter {
96 //     const char c_;
97 //     explicit SimpleDelimiter(char c) : c_(c) {}
98 //     absl::string_view Find(absl::string_view text, size_t pos) {
99 //       auto found = text.find(c_, pos);
100 //       if (found == absl::string_view::npos)
101 //         return text.substr(text.size());
102 //
103 //       return text.substr(found, 1);
104 //     }
105 //   };
106 
107 // ByString
108 //
109 // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
110 // `Delimiter` object, the string will be implicitly converted into a
111 // `ByString` delimiter.
112 //
113 // Example:
114 //
115 //   // Because a string literal is converted to an `absl::ByString`,
116 //   // the following two splits are equivalent.
117 //
118 //   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
119 //
120 //   using absl::ByString;
121 //   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
122 //                                                ByString(", "));
123 //   // v[0] == "a", v[1] == "b", v[2] == "c"
124 class ByString {
125  public:
126   explicit ByString(absl::string_view sp);
127   absl::string_view Find(absl::string_view text, size_t pos) const;
128 
129  private:
130   const std::string delimiter_;
131 };
132 
133 // ByAsciiWhitespace
134 //
135 // A sub-string delimiter that splits by ASCII whitespace
136 // (space, tab, vertical tab, formfeed, linefeed, or carriage return).
137 // Note: you probably want to use absl::SkipEmpty() as well!
138 //
139 // This class is equivalent to ByAnyChar with ASCII whitespace chars.
140 //
141 // Example:
142 //
143 //   std::vector<std::string> v = absl::StrSplit(
144 //       "a b\tc\n  d  \n", absl::ByAsciiWhitespace(), absl::SkipEmpty());
145 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
146 class ByAsciiWhitespace {
147  public:
148   absl::string_view Find(absl::string_view text, size_t pos) const;
149 };
150 
151 // ByChar
152 //
153 // A single character delimiter. `ByChar` is functionally equivalent to a
154 // 1-char string within a `ByString` delimiter, but slightly more efficient.
155 //
156 // Example:
157 //
158 //   // Because a char literal is converted to a absl::ByChar,
159 //   // the following two splits are equivalent.
160 //   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
161 //   using absl::ByChar;
162 //   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
163 //   // v[0] == "a", v[1] == "b", v[2] == "c"
164 //
165 // `ByChar` is also the default delimiter if a single character is given
166 // as the delimiter to `StrSplit()`. For example, the following calls are
167 // equivalent:
168 //
169 //   std::vector<std::string> v = absl::StrSplit("a-b", '-');
170 //
171 //   using absl::ByChar;
172 //   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
173 //
174 class ByChar {
175  public:
ByChar(char c)176   explicit ByChar(char c) : c_(c) {}
177   absl::string_view Find(absl::string_view text, size_t pos) const;
178 
179  private:
180   char c_;
181 };
182 
183 // ByAnyChar
184 //
185 // A delimiter that will match any of the given byte-sized characters within
186 // its provided string.
187 //
188 // Note: this delimiter works with single-byte string data, but does not work
189 // with variable-width encodings, such as UTF-8.
190 //
191 // Example:
192 //
193 //   using absl::ByAnyChar;
194 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
195 //   // v[0] == "a", v[1] == "b", v[2] == "c"
196 //
197 // If `ByAnyChar` is given the empty string, it behaves exactly like
198 // `ByString` and matches each individual character in the input string.
199 //
200 class ByAnyChar {
201  public:
202   explicit ByAnyChar(absl::string_view sp);
203   absl::string_view Find(absl::string_view text, size_t pos) const;
204 
205  private:
206   const std::string delimiters_;
207 };
208 
209 // ByLength
210 //
211 // A delimiter for splitting into equal-length strings. The length argument to
212 // the constructor must be greater than 0.
213 //
214 // Note: this delimiter works with single-byte string data, but does not work
215 // with variable-width encodings, such as UTF-8.
216 //
217 // Example:
218 //
219 //   using absl::ByLength;
220 //   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
221 
222 //   // v[0] == "123", v[1] == "456", v[2] == "789"
223 //
224 // Note that the string does not have to be a multiple of the fixed split
225 // length. In such a case, the last substring will be shorter.
226 //
227 //   using absl::ByLength;
228 //   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
229 //
230 //   // v[0] == "12", v[1] == "34", v[2] == "5"
231 class ByLength {
232  public:
233   explicit ByLength(ptrdiff_t length);
234   absl::string_view Find(absl::string_view text, size_t pos) const;
235 
236  private:
237   const ptrdiff_t length_;
238 };
239 
240 namespace strings_internal {
241 
242 // A traits-like metafunction for selecting the default Delimiter object type
243 // for a particular Delimiter type. The base case simply exposes type Delimiter
244 // itself as the delimiter's Type. However, there are specializations for
245 // string-like objects that map them to the ByString delimiter object.
246 // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
247 // string-like objects (e.g., ',') as delimiter arguments but they will be
248 // treated as if a ByString delimiter was given.
249 template <typename Delimiter>
250 struct SelectDelimiter {
251   using type = Delimiter;
252 };
253 
254 template <>
255 struct SelectDelimiter<char> {
256   using type = ByChar;
257 };
258 template <>
259 struct SelectDelimiter<char*> {
260   using type = ByString;
261 };
262 template <>
263 struct SelectDelimiter<const char*> {
264   using type = ByString;
265 };
266 template <>
267 struct SelectDelimiter<absl::string_view> {
268   using type = ByString;
269 };
270 template <>
271 struct SelectDelimiter<std::string> {
272   using type = ByString;
273 };
274 
275 // Wraps another delimiter and sets a max number of matches for that delimiter.
276 template <typename Delimiter>
277 class MaxSplitsImpl {
278  public:
279   MaxSplitsImpl(Delimiter delimiter, int limit)
280       : delimiter_(delimiter), limit_(limit), count_(0) {}
281   absl::string_view Find(absl::string_view text, size_t pos) {
282     if (count_++ == limit_) {
283       return absl::string_view(text.data() + text.size(),
284                                0);  // No more matches.
285     }
286     return delimiter_.Find(text, pos);
287   }
288 
289  private:
290   Delimiter delimiter_;
291   const int limit_;
292   int count_;
293 };
294 
295 }  // namespace strings_internal
296 
297 // MaxSplits()
298 //
299 // A delimiter that limits the number of matches which can occur to the passed
300 // `limit`. The last element in the returned collection will contain all
301 // remaining unsplit pieces, which may contain instances of the delimiter.
302 // The collection will contain at most `limit` + 1 elements.
303 // Example:
304 //
305 //   using absl::MaxSplits;
306 //   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
307 //
308 //   // v[0] == "a", v[1] == "b,c"
309 template <typename Delimiter>
310 inline strings_internal::MaxSplitsImpl<
311     typename strings_internal::SelectDelimiter<Delimiter>::type>
312 MaxSplits(Delimiter delimiter, int limit) {
313   typedef
314       typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
315   return strings_internal::MaxSplitsImpl<DelimiterType>(
316       DelimiterType(delimiter), limit);
317 }
318 
319 //------------------------------------------------------------------------------
320 // Predicates
321 //------------------------------------------------------------------------------
322 //
323 // Predicates filter the results of a `StrSplit()` by determining whether or not
324 // a resultant element is included in the result set. A predicate may be passed
325 // as an optional third argument to the `StrSplit()` function.
326 //
327 // Predicates are unary functions (or functors) that take a single
328 // `absl::string_view` argument and return a bool indicating whether the
329 // argument should be included (`true`) or excluded (`false`).
330 //
331 // Predicates are useful when filtering out empty substrings. By default, empty
332 // substrings may be returned by `StrSplit()`, which is similar to the way split
333 // functions work in other programming languages.
334 
335 // AllowEmpty()
336 //
337 // Always returns `true`, indicating that all strings--including empty
338 // strings--should be included in the split output. This predicate is not
339 // strictly needed because this is the default behavior of `StrSplit()`;
340 // however, it might be useful at some call sites to make the intent explicit.
341 //
342 // Example:
343 //
344 //  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
345 //
346 //  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
347 struct AllowEmpty {
348   bool operator()(absl::string_view) const { return true; }
349 };
350 
351 // SkipEmpty()
352 //
353 // Returns `false` if the given `absl::string_view` is empty, indicating that
354 // `StrSplit()` should omit the empty string.
355 //
356 // Example:
357 //
358 //   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
359 //
360 //   // v[0] == "a", v[1] == "b"
361 //
362 // Note: `SkipEmpty()` does not consider a string containing only whitespace
363 // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
364 // predicate.
365 struct SkipEmpty {
366   bool operator()(absl::string_view sp) const { return !sp.empty(); }
367 };
368 
369 // SkipWhitespace()
370 //
371 // Returns `false` if the given `absl::string_view` is empty *or* contains only
372 // whitespace, indicating that `StrSplit()` should omit the string.
373 //
374 // Example:
375 //
376 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
377 //                                               ',', SkipWhitespace());
378 //   // v[0] == " a ", v[1] == "b"
379 //
380 //   // SkipEmpty() would return whitespace elements
381 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
382 //   // v[0] == " a ", v[1] == " ", v[2] == "b"
383 struct SkipWhitespace {
384   bool operator()(absl::string_view sp) const {
385     sp = absl::StripAsciiWhitespace(sp);
386     return !sp.empty();
387   }
388 };
389 
390 template <typename T>
391 using EnableSplitIfString =
392     typename std::enable_if<std::is_same<T, std::string>::value ||
393                             std::is_same<T, const std::string>::value,
394                             int>::type;
395 
396 //------------------------------------------------------------------------------
397 //                                  StrSplit()
398 //------------------------------------------------------------------------------
399 
400 // StrSplit()
401 //
402 // Splits a given string based on the provided `Delimiter` object, returning the
403 // elements within the type specified by the caller. Optionally, you may pass a
404 // `Predicate` to `StrSplit()` indicating whether to include or exclude the
405 // resulting element within the final result set. (See the overviews for
406 // Delimiters and Predicates above.)
407 //
408 // Example:
409 //
410 //   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
411 //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
412 //
413 // You can also provide an explicit `Delimiter` object:
414 //
415 // Example:
416 //
417 //   using absl::ByAnyChar;
418 //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
419 //   // v[0] == "a", v[1] == "b", v[2] == "c"
420 //
421 // See above for more information on delimiters.
422 //
423 // By default, empty strings are included in the result set. You can optionally
424 // include a third `Predicate` argument to apply a test for whether the
425 // resultant element should be included in the result set:
426 //
427 // Example:
428 //
429 //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
430 //                                               ',', SkipWhitespace());
431 //   // v[0] == " a ", v[1] == "b"
432 //
433 // See above for more information on predicates.
434 //
435 //------------------------------------------------------------------------------
436 // StrSplit() Return Types
437 //------------------------------------------------------------------------------
438 //
439 // The `StrSplit()` function adapts the returned collection to the collection
440 // specified by the caller (e.g. `std::vector` above). The returned collections
441 // may contain `std::string`, `absl::string_view` (in which case the original
442 // string being split must ensure that it outlives the collection), or any
443 // object that can be explicitly created from an `absl::string_view`. This
444 // behavior works for:
445 //
446 // 1) All standard STL containers including `std::vector`, `std::list`,
447 //    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`.
448 // 2) `std::pair` (which is not actually a container). See below.
449 // 3) `std::array`, which is a container but has different behavior due to its
450 //    fixed size. See below.
451 //
452 // Example:
453 //
454 //   // The results are returned as `absl::string_view` objects. Note that we
455 //   // have to ensure that the input string outlives any results.
456 //   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
457 //
458 //   // Stores results in a std::set<std::string>, which also performs
459 //   // de-duplication and orders the elements in ascending order.
460 //   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
461 //   // a[0] == "a", a[1] == "b", a[2] == "c"
462 //
463 //   // `StrSplit()` can be used within a range-based for loop, in which case
464 //   // each element will be of type `absl::string_view`.
465 //   std::vector<std::string> v;
466 //   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
467 //     if (sv != "b") v.emplace_back(sv);
468 //   }
469 //   // v[0] == "a", v[1] == "c"
470 //
471 //   // Stores results in a map. The map implementation assumes that the input
472 //   // is provided as a series of key/value pairs. For example, the 0th element
473 //   // resulting from the split will be stored as a key to the 1st element. If
474 //   // an odd number of elements are resolved, the last element is paired with
475 //   // a default-constructed value (e.g., empty string).
476 //   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
477 //   // m["a"] == "b", m["c"] == ""     // last component value equals ""
478 //
479 // Splitting to `std::pair` is an interesting case because it can hold only two
480 // elements and is not a collection type. When splitting to a `std::pair` the
481 // first two split strings become the `std::pair` `.first` and `.second`
482 // members, respectively. The remaining split substrings are discarded. If there
483 // are less than two split substrings, the empty string is used for the
484 // corresponding `std::pair` member.
485 //
486 // Example:
487 //
488 //   // Stores first two split strings as the members in a std::pair.
489 //   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
490 //   // p.first == "a", p.second == "b"       // "c" is omitted.
491 //
492 //
493 // Splitting to `std::array` is similar to splitting to `std::pair`, but for
494 // N elements instead of two; missing elements are filled with the empty string
495 // and extra elements are discarded.
496 //
497 // Examples:
498 //
499 //   // Stores first two split strings as the elements in a std::array.
500 //   std::array<std::string, 2> a = absl::StrSplit("a,b,c", ',');
501 //   // a[0] == "a", a[1] == "b"   // "c" is omitted.
502 //
503 //   // The second element is empty.
504 //   std::array<std::string, 2> a = absl::StrSplit("a,", ',');
505 //   // a[0] == "a", a[1] == ""
506 //
507 // The `StrSplit()` function can be used multiple times to perform more
508 // complicated splitting logic, such as intelligently parsing key-value pairs.
509 //
510 // Example:
511 //
512 //   // The input string "a=b=c,d=e,f=,g" becomes
513 //   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
514 //   std::map<std::string, std::string> m;
515 //   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
516 //     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
517 //   }
518 //   EXPECT_EQ("b=c", m.find("a")->second);
519 //   EXPECT_EQ("e", m.find("d")->second);
520 //   EXPECT_EQ("", m.find("f")->second);
521 //   EXPECT_EQ("", m.find("g")->second);
522 //
523 // WARNING: Due to a legacy bug that is maintained for backward compatibility,
524 // splitting the following empty string_views produces different results:
525 //
526 //   absl::StrSplit(absl::string_view(""), '-');  // {""}
527 //   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
528 //
529 // Try not to depend on this distinction because the bug may one day be fixed.
530 template <typename Delimiter>
531 strings_internal::Splitter<
532     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
533     absl::string_view>
534 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
535   using DelimiterType =
536       typename strings_internal::SelectDelimiter<Delimiter>::type;
537   return strings_internal::Splitter<DelimiterType, AllowEmpty,
538                                     absl::string_view>(
539       text.value(), DelimiterType(d), AllowEmpty());
540 }
541 
542 template <typename Delimiter, typename StringType,
543           EnableSplitIfString<StringType> = 0>
544 strings_internal::Splitter<
545     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
546     std::string>
547 StrSplit(StringType&& text, Delimiter d) {
548   using DelimiterType =
549       typename strings_internal::SelectDelimiter<Delimiter>::type;
550   return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>(
551       std::move(text), DelimiterType(d), AllowEmpty());
552 }
553 
554 template <typename Delimiter, typename Predicate>
555 strings_internal::Splitter<
556     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
557     absl::string_view>
558 StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
559          Predicate p) {
560   using DelimiterType =
561       typename strings_internal::SelectDelimiter<Delimiter>::type;
562   return strings_internal::Splitter<DelimiterType, Predicate,
563                                     absl::string_view>(
564       text.value(), DelimiterType(std::move(d)), std::move(p));
565 }
566 
567 template <typename Delimiter, typename Predicate, typename StringType,
568           EnableSplitIfString<StringType> = 0>
569 strings_internal::Splitter<
570     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
571     std::string>
572 StrSplit(StringType&& text, Delimiter d, Predicate p) {
573   using DelimiterType =
574       typename strings_internal::SelectDelimiter<Delimiter>::type;
575   return strings_internal::Splitter<DelimiterType, Predicate, std::string>(
576       std::move(text), DelimiterType(d), std::move(p));
577 }
578 
579 ABSL_NAMESPACE_END
580 }  // namespace absl
581 
582 #endif  // ABSL_STRINGS_STR_SPLIT_H_
583