xref: /aosp_15_r20/external/abseil-cpp/absl/strings/str_split.h (revision 9356374a3709195abf420251b3e825997ff56c0f)
1*9356374aSAndroid Build Coastguard Worker //
2*9356374aSAndroid Build Coastguard Worker // Copyright 2017 The Abseil Authors.
3*9356374aSAndroid Build Coastguard Worker //
4*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
5*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
6*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at
7*9356374aSAndroid Build Coastguard Worker //
8*9356374aSAndroid Build Coastguard Worker //      https://www.apache.org/licenses/LICENSE-2.0
9*9356374aSAndroid Build Coastguard Worker //
10*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
11*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
12*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
14*9356374aSAndroid Build Coastguard Worker // limitations under the License.
15*9356374aSAndroid Build Coastguard Worker //
16*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
17*9356374aSAndroid Build Coastguard Worker // File: str_split.h
18*9356374aSAndroid Build Coastguard Worker // -----------------------------------------------------------------------------
19*9356374aSAndroid Build Coastguard Worker //
20*9356374aSAndroid Build Coastguard Worker // This file contains functions for splitting strings. It defines the main
21*9356374aSAndroid Build Coastguard Worker // `StrSplit()` function, several delimiters for determining the boundaries on
22*9356374aSAndroid Build Coastguard Worker // which to split the string, and predicates for filtering delimited results.
23*9356374aSAndroid Build Coastguard Worker // `StrSplit()` adapts the returned collection to the type specified by the
24*9356374aSAndroid Build Coastguard Worker // caller.
25*9356374aSAndroid Build Coastguard Worker //
26*9356374aSAndroid Build Coastguard Worker // Example:
27*9356374aSAndroid Build Coastguard Worker //
28*9356374aSAndroid Build Coastguard Worker //   // Splits the given string on commas. Returns the results in a
29*9356374aSAndroid Build Coastguard Worker //   // vector of strings.
30*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
31*9356374aSAndroid Build Coastguard Worker //   // Can also use ","
32*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c"
33*9356374aSAndroid Build Coastguard Worker //
34*9356374aSAndroid Build Coastguard Worker // See StrSplit() below for more information.
35*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_STRINGS_STR_SPLIT_H_
36*9356374aSAndroid Build Coastguard Worker #define ABSL_STRINGS_STR_SPLIT_H_
37*9356374aSAndroid Build Coastguard Worker 
38*9356374aSAndroid Build Coastguard Worker #include <algorithm>
39*9356374aSAndroid Build Coastguard Worker #include <cstddef>
40*9356374aSAndroid Build Coastguard Worker #include <map>
41*9356374aSAndroid Build Coastguard Worker #include <set>
42*9356374aSAndroid Build Coastguard Worker #include <string>
43*9356374aSAndroid Build Coastguard Worker #include <utility>
44*9356374aSAndroid Build Coastguard Worker #include <vector>
45*9356374aSAndroid Build Coastguard Worker 
46*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/raw_logging.h"
47*9356374aSAndroid Build Coastguard Worker #include "absl/base/macros.h"
48*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/str_split_internal.h"
49*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h"
50*9356374aSAndroid Build Coastguard Worker #include "absl/strings/strip.h"
51*9356374aSAndroid Build Coastguard Worker 
52*9356374aSAndroid Build Coastguard Worker namespace absl {
53*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN
54*9356374aSAndroid Build Coastguard Worker 
55*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
56*9356374aSAndroid Build Coastguard Worker // Delimiters
57*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
58*9356374aSAndroid Build Coastguard Worker //
59*9356374aSAndroid Build Coastguard Worker // `StrSplit()` uses delimiters to define the boundaries between elements in the
60*9356374aSAndroid Build Coastguard Worker // provided input. Several `Delimiter` types are defined below. If a string
61*9356374aSAndroid Build Coastguard Worker // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of
62*9356374aSAndroid Build Coastguard Worker // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it
63*9356374aSAndroid Build Coastguard Worker // were passed a `ByString` delimiter.
64*9356374aSAndroid Build Coastguard Worker //
65*9356374aSAndroid Build Coastguard Worker // A `Delimiter` is an object with a `Find()` function that knows how to find
66*9356374aSAndroid Build Coastguard Worker // the first occurrence of itself in a given `absl::string_view`.
67*9356374aSAndroid Build Coastguard Worker //
68*9356374aSAndroid Build Coastguard Worker // The following `Delimiter` types are available for use within `StrSplit()`:
69*9356374aSAndroid Build Coastguard Worker //
70*9356374aSAndroid Build Coastguard Worker //   - `ByString` (default for string arguments)
71*9356374aSAndroid Build Coastguard Worker //   - `ByChar` (default for a char argument)
72*9356374aSAndroid Build Coastguard Worker //   - `ByAnyChar`
73*9356374aSAndroid Build Coastguard Worker //   - `ByLength`
74*9356374aSAndroid Build Coastguard Worker //   - `MaxSplits`
75*9356374aSAndroid Build Coastguard Worker //
76*9356374aSAndroid Build Coastguard Worker // A Delimiter's `Find()` member function will be passed an input `text` that is
77*9356374aSAndroid Build Coastguard Worker // to be split and a position (`pos`) to begin searching for the next delimiter
78*9356374aSAndroid Build Coastguard Worker // in `text`. The returned absl::string_view should refer to the next occurrence
79*9356374aSAndroid Build Coastguard Worker // (after `pos`) of the represented delimiter; this returned absl::string_view
80*9356374aSAndroid Build Coastguard Worker // represents the next location where the input `text` should be broken.
81*9356374aSAndroid Build Coastguard Worker //
82*9356374aSAndroid Build Coastguard Worker // The returned absl::string_view may be zero-length if the Delimiter does not
83*9356374aSAndroid Build Coastguard Worker // represent a part of the string (e.g., a fixed-length delimiter). If no
84*9356374aSAndroid Build Coastguard Worker // delimiter is found in the input `text`, a zero-length absl::string_view
85*9356374aSAndroid Build Coastguard Worker // referring to `text.end()` should be returned (e.g.,
86*9356374aSAndroid Build Coastguard Worker // `text.substr(text.size())`). It is important that the returned
87*9356374aSAndroid Build Coastguard Worker // absl::string_view always be within the bounds of the input `text` given as an
88*9356374aSAndroid Build Coastguard Worker // argument--it must not refer to a string that is physically located outside of
89*9356374aSAndroid Build Coastguard Worker // the given string.
90*9356374aSAndroid Build Coastguard Worker //
91*9356374aSAndroid Build Coastguard Worker // The following example is a simple Delimiter object that is created with a
92*9356374aSAndroid Build Coastguard Worker // single char and will look for that char in the text passed to the `Find()`
93*9356374aSAndroid Build Coastguard Worker // function:
94*9356374aSAndroid Build Coastguard Worker //
95*9356374aSAndroid Build Coastguard Worker //   struct SimpleDelimiter {
96*9356374aSAndroid Build Coastguard Worker //     const char c_;
97*9356374aSAndroid Build Coastguard Worker //     explicit SimpleDelimiter(char c) : c_(c) {}
98*9356374aSAndroid Build Coastguard Worker //     absl::string_view Find(absl::string_view text, size_t pos) {
99*9356374aSAndroid Build Coastguard Worker //       auto found = text.find(c_, pos);
100*9356374aSAndroid Build Coastguard Worker //       if (found == absl::string_view::npos)
101*9356374aSAndroid Build Coastguard Worker //         return text.substr(text.size());
102*9356374aSAndroid Build Coastguard Worker //
103*9356374aSAndroid Build Coastguard Worker //       return text.substr(found, 1);
104*9356374aSAndroid Build Coastguard Worker //     }
105*9356374aSAndroid Build Coastguard Worker //   };
106*9356374aSAndroid Build Coastguard Worker 
107*9356374aSAndroid Build Coastguard Worker // ByString
108*9356374aSAndroid Build Coastguard Worker //
109*9356374aSAndroid Build Coastguard Worker // A sub-string delimiter. If `StrSplit()` is passed a string in place of a
110*9356374aSAndroid Build Coastguard Worker // `Delimiter` object, the string will be implicitly converted into a
111*9356374aSAndroid Build Coastguard Worker // `ByString` delimiter.
112*9356374aSAndroid Build Coastguard Worker //
113*9356374aSAndroid Build Coastguard Worker // Example:
114*9356374aSAndroid Build Coastguard Worker //
115*9356374aSAndroid Build Coastguard Worker //   // Because a string literal is converted to an `absl::ByString`,
116*9356374aSAndroid Build Coastguard Worker //   // the following two splits are equivalent.
117*9356374aSAndroid Build Coastguard Worker //
118*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", ");
119*9356374aSAndroid Build Coastguard Worker //
120*9356374aSAndroid Build Coastguard Worker //   using absl::ByString;
121*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v2 = absl::StrSplit("a, b, c",
122*9356374aSAndroid Build Coastguard Worker //                                                ByString(", "));
123*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c"
124*9356374aSAndroid Build Coastguard Worker class ByString {
125*9356374aSAndroid Build Coastguard Worker  public:
126*9356374aSAndroid Build Coastguard Worker   explicit ByString(absl::string_view sp);
127*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) const;
128*9356374aSAndroid Build Coastguard Worker 
129*9356374aSAndroid Build Coastguard Worker  private:
130*9356374aSAndroid Build Coastguard Worker   const std::string delimiter_;
131*9356374aSAndroid Build Coastguard Worker };
132*9356374aSAndroid Build Coastguard Worker 
133*9356374aSAndroid Build Coastguard Worker // ByAsciiWhitespace
134*9356374aSAndroid Build Coastguard Worker //
135*9356374aSAndroid Build Coastguard Worker // A sub-string delimiter that splits by ASCII whitespace
136*9356374aSAndroid Build Coastguard Worker // (space, tab, vertical tab, formfeed, linefeed, or carriage return).
137*9356374aSAndroid Build Coastguard Worker // Note: you probably want to use absl::SkipEmpty() as well!
138*9356374aSAndroid Build Coastguard Worker //
139*9356374aSAndroid Build Coastguard Worker // This class is equivalent to ByAnyChar with ASCII whitespace chars.
140*9356374aSAndroid Build Coastguard Worker //
141*9356374aSAndroid Build Coastguard Worker // Example:
142*9356374aSAndroid Build Coastguard Worker //
143*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit(
144*9356374aSAndroid Build Coastguard Worker //       "a b\tc\n  d  \n", absl::ByAsciiWhitespace(), absl::SkipEmpty());
145*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
146*9356374aSAndroid Build Coastguard Worker class ByAsciiWhitespace {
147*9356374aSAndroid Build Coastguard Worker  public:
148*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) const;
149*9356374aSAndroid Build Coastguard Worker };
150*9356374aSAndroid Build Coastguard Worker 
151*9356374aSAndroid Build Coastguard Worker // ByChar
152*9356374aSAndroid Build Coastguard Worker //
153*9356374aSAndroid Build Coastguard Worker // A single character delimiter. `ByChar` is functionally equivalent to a
154*9356374aSAndroid Build Coastguard Worker // 1-char string within a `ByString` delimiter, but slightly more efficient.
155*9356374aSAndroid Build Coastguard Worker //
156*9356374aSAndroid Build Coastguard Worker // Example:
157*9356374aSAndroid Build Coastguard Worker //
158*9356374aSAndroid Build Coastguard Worker //   // Because a char literal is converted to a absl::ByChar,
159*9356374aSAndroid Build Coastguard Worker //   // the following two splits are equivalent.
160*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
161*9356374aSAndroid Build Coastguard Worker //   using absl::ByChar;
162*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(','));
163*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c"
164*9356374aSAndroid Build Coastguard Worker //
165*9356374aSAndroid Build Coastguard Worker // `ByChar` is also the default delimiter if a single character is given
166*9356374aSAndroid Build Coastguard Worker // as the delimiter to `StrSplit()`. For example, the following calls are
167*9356374aSAndroid Build Coastguard Worker // equivalent:
168*9356374aSAndroid Build Coastguard Worker //
169*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a-b", '-');
170*9356374aSAndroid Build Coastguard Worker //
171*9356374aSAndroid Build Coastguard Worker //   using absl::ByChar;
172*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-'));
173*9356374aSAndroid Build Coastguard Worker //
174*9356374aSAndroid Build Coastguard Worker class ByChar {
175*9356374aSAndroid Build Coastguard Worker  public:
ByChar(char c)176*9356374aSAndroid Build Coastguard Worker   explicit ByChar(char c) : c_(c) {}
177*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) const;
178*9356374aSAndroid Build Coastguard Worker 
179*9356374aSAndroid Build Coastguard Worker  private:
180*9356374aSAndroid Build Coastguard Worker   char c_;
181*9356374aSAndroid Build Coastguard Worker };
182*9356374aSAndroid Build Coastguard Worker 
183*9356374aSAndroid Build Coastguard Worker // ByAnyChar
184*9356374aSAndroid Build Coastguard Worker //
185*9356374aSAndroid Build Coastguard Worker // A delimiter that will match any of the given byte-sized characters within
186*9356374aSAndroid Build Coastguard Worker // its provided string.
187*9356374aSAndroid Build Coastguard Worker //
188*9356374aSAndroid Build Coastguard Worker // Note: this delimiter works with single-byte string data, but does not work
189*9356374aSAndroid Build Coastguard Worker // with variable-width encodings, such as UTF-8.
190*9356374aSAndroid Build Coastguard Worker //
191*9356374aSAndroid Build Coastguard Worker // Example:
192*9356374aSAndroid Build Coastguard Worker //
193*9356374aSAndroid Build Coastguard Worker //   using absl::ByAnyChar;
194*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
195*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c"
196*9356374aSAndroid Build Coastguard Worker //
197*9356374aSAndroid Build Coastguard Worker // If `ByAnyChar` is given the empty string, it behaves exactly like
198*9356374aSAndroid Build Coastguard Worker // `ByString` and matches each individual character in the input string.
199*9356374aSAndroid Build Coastguard Worker //
200*9356374aSAndroid Build Coastguard Worker class ByAnyChar {
201*9356374aSAndroid Build Coastguard Worker  public:
202*9356374aSAndroid Build Coastguard Worker   explicit ByAnyChar(absl::string_view sp);
203*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) const;
204*9356374aSAndroid Build Coastguard Worker 
205*9356374aSAndroid Build Coastguard Worker  private:
206*9356374aSAndroid Build Coastguard Worker   const std::string delimiters_;
207*9356374aSAndroid Build Coastguard Worker };
208*9356374aSAndroid Build Coastguard Worker 
209*9356374aSAndroid Build Coastguard Worker // ByLength
210*9356374aSAndroid Build Coastguard Worker //
211*9356374aSAndroid Build Coastguard Worker // A delimiter for splitting into equal-length strings. The length argument to
212*9356374aSAndroid Build Coastguard Worker // the constructor must be greater than 0.
213*9356374aSAndroid Build Coastguard Worker //
214*9356374aSAndroid Build Coastguard Worker // Note: this delimiter works with single-byte string data, but does not work
215*9356374aSAndroid Build Coastguard Worker // with variable-width encodings, such as UTF-8.
216*9356374aSAndroid Build Coastguard Worker //
217*9356374aSAndroid Build Coastguard Worker // Example:
218*9356374aSAndroid Build Coastguard Worker //
219*9356374aSAndroid Build Coastguard Worker //   using absl::ByLength;
220*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3));
221*9356374aSAndroid Build Coastguard Worker 
222*9356374aSAndroid Build Coastguard Worker //   // v[0] == "123", v[1] == "456", v[2] == "789"
223*9356374aSAndroid Build Coastguard Worker //
224*9356374aSAndroid Build Coastguard Worker // Note that the string does not have to be a multiple of the fixed split
225*9356374aSAndroid Build Coastguard Worker // length. In such a case, the last substring will be shorter.
226*9356374aSAndroid Build Coastguard Worker //
227*9356374aSAndroid Build Coastguard Worker //   using absl::ByLength;
228*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("12345", ByLength(2));
229*9356374aSAndroid Build Coastguard Worker //
230*9356374aSAndroid Build Coastguard Worker //   // v[0] == "12", v[1] == "34", v[2] == "5"
231*9356374aSAndroid Build Coastguard Worker class ByLength {
232*9356374aSAndroid Build Coastguard Worker  public:
233*9356374aSAndroid Build Coastguard Worker   explicit ByLength(ptrdiff_t length);
234*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) const;
235*9356374aSAndroid Build Coastguard Worker 
236*9356374aSAndroid Build Coastguard Worker  private:
237*9356374aSAndroid Build Coastguard Worker   const ptrdiff_t length_;
238*9356374aSAndroid Build Coastguard Worker };
239*9356374aSAndroid Build Coastguard Worker 
240*9356374aSAndroid Build Coastguard Worker namespace strings_internal {
241*9356374aSAndroid Build Coastguard Worker 
242*9356374aSAndroid Build Coastguard Worker // A traits-like metafunction for selecting the default Delimiter object type
243*9356374aSAndroid Build Coastguard Worker // for a particular Delimiter type. The base case simply exposes type Delimiter
244*9356374aSAndroid Build Coastguard Worker // itself as the delimiter's Type. However, there are specializations for
245*9356374aSAndroid Build Coastguard Worker // string-like objects that map them to the ByString delimiter object.
246*9356374aSAndroid Build Coastguard Worker // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept
247*9356374aSAndroid Build Coastguard Worker // string-like objects (e.g., ',') as delimiter arguments but they will be
248*9356374aSAndroid Build Coastguard Worker // treated as if a ByString delimiter was given.
249*9356374aSAndroid Build Coastguard Worker template <typename Delimiter>
250*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter {
251*9356374aSAndroid Build Coastguard Worker   using type = Delimiter;
252*9356374aSAndroid Build Coastguard Worker };
253*9356374aSAndroid Build Coastguard Worker 
254*9356374aSAndroid Build Coastguard Worker template <>
255*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<char> {
256*9356374aSAndroid Build Coastguard Worker   using type = ByChar;
257*9356374aSAndroid Build Coastguard Worker };
258*9356374aSAndroid Build Coastguard Worker template <>
259*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<char*> {
260*9356374aSAndroid Build Coastguard Worker   using type = ByString;
261*9356374aSAndroid Build Coastguard Worker };
262*9356374aSAndroid Build Coastguard Worker template <>
263*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<const char*> {
264*9356374aSAndroid Build Coastguard Worker   using type = ByString;
265*9356374aSAndroid Build Coastguard Worker };
266*9356374aSAndroid Build Coastguard Worker template <>
267*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<absl::string_view> {
268*9356374aSAndroid Build Coastguard Worker   using type = ByString;
269*9356374aSAndroid Build Coastguard Worker };
270*9356374aSAndroid Build Coastguard Worker template <>
271*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<std::string> {
272*9356374aSAndroid Build Coastguard Worker   using type = ByString;
273*9356374aSAndroid Build Coastguard Worker };
274*9356374aSAndroid Build Coastguard Worker 
275*9356374aSAndroid Build Coastguard Worker // Wraps another delimiter and sets a max number of matches for that delimiter.
276*9356374aSAndroid Build Coastguard Worker template <typename Delimiter>
277*9356374aSAndroid Build Coastguard Worker class MaxSplitsImpl {
278*9356374aSAndroid Build Coastguard Worker  public:
279*9356374aSAndroid Build Coastguard Worker   MaxSplitsImpl(Delimiter delimiter, int limit)
280*9356374aSAndroid Build Coastguard Worker       : delimiter_(delimiter), limit_(limit), count_(0) {}
281*9356374aSAndroid Build Coastguard Worker   absl::string_view Find(absl::string_view text, size_t pos) {
282*9356374aSAndroid Build Coastguard Worker     if (count_++ == limit_) {
283*9356374aSAndroid Build Coastguard Worker       return absl::string_view(text.data() + text.size(),
284*9356374aSAndroid Build Coastguard Worker                                0);  // No more matches.
285*9356374aSAndroid Build Coastguard Worker     }
286*9356374aSAndroid Build Coastguard Worker     return delimiter_.Find(text, pos);
287*9356374aSAndroid Build Coastguard Worker   }
288*9356374aSAndroid Build Coastguard Worker 
289*9356374aSAndroid Build Coastguard Worker  private:
290*9356374aSAndroid Build Coastguard Worker   Delimiter delimiter_;
291*9356374aSAndroid Build Coastguard Worker   const int limit_;
292*9356374aSAndroid Build Coastguard Worker   int count_;
293*9356374aSAndroid Build Coastguard Worker };
294*9356374aSAndroid Build Coastguard Worker 
295*9356374aSAndroid Build Coastguard Worker }  // namespace strings_internal
296*9356374aSAndroid Build Coastguard Worker 
297*9356374aSAndroid Build Coastguard Worker // MaxSplits()
298*9356374aSAndroid Build Coastguard Worker //
299*9356374aSAndroid Build Coastguard Worker // A delimiter that limits the number of matches which can occur to the passed
300*9356374aSAndroid Build Coastguard Worker // `limit`. The last element in the returned collection will contain all
301*9356374aSAndroid Build Coastguard Worker // remaining unsplit pieces, which may contain instances of the delimiter.
302*9356374aSAndroid Build Coastguard Worker // The collection will contain at most `limit` + 1 elements.
303*9356374aSAndroid Build Coastguard Worker // Example:
304*9356374aSAndroid Build Coastguard Worker //
305*9356374aSAndroid Build Coastguard Worker //   using absl::MaxSplits;
306*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1));
307*9356374aSAndroid Build Coastguard Worker //
308*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b,c"
309*9356374aSAndroid Build Coastguard Worker template <typename Delimiter>
310*9356374aSAndroid Build Coastguard Worker inline strings_internal::MaxSplitsImpl<
311*9356374aSAndroid Build Coastguard Worker     typename strings_internal::SelectDelimiter<Delimiter>::type>
312*9356374aSAndroid Build Coastguard Worker MaxSplits(Delimiter delimiter, int limit) {
313*9356374aSAndroid Build Coastguard Worker   typedef
314*9356374aSAndroid Build Coastguard Worker       typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType;
315*9356374aSAndroid Build Coastguard Worker   return strings_internal::MaxSplitsImpl<DelimiterType>(
316*9356374aSAndroid Build Coastguard Worker       DelimiterType(delimiter), limit);
317*9356374aSAndroid Build Coastguard Worker }
318*9356374aSAndroid Build Coastguard Worker 
319*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
320*9356374aSAndroid Build Coastguard Worker // Predicates
321*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
322*9356374aSAndroid Build Coastguard Worker //
323*9356374aSAndroid Build Coastguard Worker // Predicates filter the results of a `StrSplit()` by determining whether or not
324*9356374aSAndroid Build Coastguard Worker // a resultant element is included in the result set. A predicate may be passed
325*9356374aSAndroid Build Coastguard Worker // as an optional third argument to the `StrSplit()` function.
326*9356374aSAndroid Build Coastguard Worker //
327*9356374aSAndroid Build Coastguard Worker // Predicates are unary functions (or functors) that take a single
328*9356374aSAndroid Build Coastguard Worker // `absl::string_view` argument and return a bool indicating whether the
329*9356374aSAndroid Build Coastguard Worker // argument should be included (`true`) or excluded (`false`).
330*9356374aSAndroid Build Coastguard Worker //
331*9356374aSAndroid Build Coastguard Worker // Predicates are useful when filtering out empty substrings. By default, empty
332*9356374aSAndroid Build Coastguard Worker // substrings may be returned by `StrSplit()`, which is similar to the way split
333*9356374aSAndroid Build Coastguard Worker // functions work in other programming languages.
334*9356374aSAndroid Build Coastguard Worker 
335*9356374aSAndroid Build Coastguard Worker // AllowEmpty()
336*9356374aSAndroid Build Coastguard Worker //
337*9356374aSAndroid Build Coastguard Worker // Always returns `true`, indicating that all strings--including empty
338*9356374aSAndroid Build Coastguard Worker // strings--should be included in the split output. This predicate is not
339*9356374aSAndroid Build Coastguard Worker // strictly needed because this is the default behavior of `StrSplit()`;
340*9356374aSAndroid Build Coastguard Worker // however, it might be useful at some call sites to make the intent explicit.
341*9356374aSAndroid Build Coastguard Worker //
342*9356374aSAndroid Build Coastguard Worker // Example:
343*9356374aSAndroid Build Coastguard Worker //
344*9356374aSAndroid Build Coastguard Worker //  std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty());
345*9356374aSAndroid Build Coastguard Worker //
346*9356374aSAndroid Build Coastguard Worker //  // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == ""
347*9356374aSAndroid Build Coastguard Worker struct AllowEmpty {
348*9356374aSAndroid Build Coastguard Worker   bool operator()(absl::string_view) const { return true; }
349*9356374aSAndroid Build Coastguard Worker };
350*9356374aSAndroid Build Coastguard Worker 
351*9356374aSAndroid Build Coastguard Worker // SkipEmpty()
352*9356374aSAndroid Build Coastguard Worker //
353*9356374aSAndroid Build Coastguard Worker // Returns `false` if the given `absl::string_view` is empty, indicating that
354*9356374aSAndroid Build Coastguard Worker // `StrSplit()` should omit the empty string.
355*9356374aSAndroid Build Coastguard Worker //
356*9356374aSAndroid Build Coastguard Worker // Example:
357*9356374aSAndroid Build Coastguard Worker //
358*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty());
359*9356374aSAndroid Build Coastguard Worker //
360*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b"
361*9356374aSAndroid Build Coastguard Worker //
362*9356374aSAndroid Build Coastguard Worker // Note: `SkipEmpty()` does not consider a string containing only whitespace
363*9356374aSAndroid Build Coastguard Worker // to be empty. To skip such whitespace as well, use the `SkipWhitespace()`
364*9356374aSAndroid Build Coastguard Worker // predicate.
365*9356374aSAndroid Build Coastguard Worker struct SkipEmpty {
366*9356374aSAndroid Build Coastguard Worker   bool operator()(absl::string_view sp) const { return !sp.empty(); }
367*9356374aSAndroid Build Coastguard Worker };
368*9356374aSAndroid Build Coastguard Worker 
369*9356374aSAndroid Build Coastguard Worker // SkipWhitespace()
370*9356374aSAndroid Build Coastguard Worker //
371*9356374aSAndroid Build Coastguard Worker // Returns `false` if the given `absl::string_view` is empty *or* contains only
372*9356374aSAndroid Build Coastguard Worker // whitespace, indicating that `StrSplit()` should omit the string.
373*9356374aSAndroid Build Coastguard Worker //
374*9356374aSAndroid Build Coastguard Worker // Example:
375*9356374aSAndroid Build Coastguard Worker //
376*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
377*9356374aSAndroid Build Coastguard Worker //                                               ',', SkipWhitespace());
378*9356374aSAndroid Build Coastguard Worker //   // v[0] == " a ", v[1] == "b"
379*9356374aSAndroid Build Coastguard Worker //
380*9356374aSAndroid Build Coastguard Worker //   // SkipEmpty() would return whitespace elements
381*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty());
382*9356374aSAndroid Build Coastguard Worker //   // v[0] == " a ", v[1] == " ", v[2] == "b"
383*9356374aSAndroid Build Coastguard Worker struct SkipWhitespace {
384*9356374aSAndroid Build Coastguard Worker   bool operator()(absl::string_view sp) const {
385*9356374aSAndroid Build Coastguard Worker     sp = absl::StripAsciiWhitespace(sp);
386*9356374aSAndroid Build Coastguard Worker     return !sp.empty();
387*9356374aSAndroid Build Coastguard Worker   }
388*9356374aSAndroid Build Coastguard Worker };
389*9356374aSAndroid Build Coastguard Worker 
390*9356374aSAndroid Build Coastguard Worker template <typename T>
391*9356374aSAndroid Build Coastguard Worker using EnableSplitIfString =
392*9356374aSAndroid Build Coastguard Worker     typename std::enable_if<std::is_same<T, std::string>::value ||
393*9356374aSAndroid Build Coastguard Worker                             std::is_same<T, const std::string>::value,
394*9356374aSAndroid Build Coastguard Worker                             int>::type;
395*9356374aSAndroid Build Coastguard Worker 
396*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
397*9356374aSAndroid Build Coastguard Worker //                                  StrSplit()
398*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
399*9356374aSAndroid Build Coastguard Worker 
400*9356374aSAndroid Build Coastguard Worker // StrSplit()
401*9356374aSAndroid Build Coastguard Worker //
402*9356374aSAndroid Build Coastguard Worker // Splits a given string based on the provided `Delimiter` object, returning the
403*9356374aSAndroid Build Coastguard Worker // elements within the type specified by the caller. Optionally, you may pass a
404*9356374aSAndroid Build Coastguard Worker // `Predicate` to `StrSplit()` indicating whether to include or exclude the
405*9356374aSAndroid Build Coastguard Worker // resulting element within the final result set. (See the overviews for
406*9356374aSAndroid Build Coastguard Worker // Delimiters and Predicates above.)
407*9356374aSAndroid Build Coastguard Worker //
408*9356374aSAndroid Build Coastguard Worker // Example:
409*9356374aSAndroid Build Coastguard Worker //
410*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a,b,c,d", ',');
411*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d"
412*9356374aSAndroid Build Coastguard Worker //
413*9356374aSAndroid Build Coastguard Worker // You can also provide an explicit `Delimiter` object:
414*9356374aSAndroid Build Coastguard Worker //
415*9356374aSAndroid Build Coastguard Worker // Example:
416*9356374aSAndroid Build Coastguard Worker //
417*9356374aSAndroid Build Coastguard Worker //   using absl::ByAnyChar;
418*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",="));
419*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "b", v[2] == "c"
420*9356374aSAndroid Build Coastguard Worker //
421*9356374aSAndroid Build Coastguard Worker // See above for more information on delimiters.
422*9356374aSAndroid Build Coastguard Worker //
423*9356374aSAndroid Build Coastguard Worker // By default, empty strings are included in the result set. You can optionally
424*9356374aSAndroid Build Coastguard Worker // include a third `Predicate` argument to apply a test for whether the
425*9356374aSAndroid Build Coastguard Worker // resultant element should be included in the result set:
426*9356374aSAndroid Build Coastguard Worker //
427*9356374aSAndroid Build Coastguard Worker // Example:
428*9356374aSAndroid Build Coastguard Worker //
429*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v = absl::StrSplit(" a , ,,b,",
430*9356374aSAndroid Build Coastguard Worker //                                               ',', SkipWhitespace());
431*9356374aSAndroid Build Coastguard Worker //   // v[0] == " a ", v[1] == "b"
432*9356374aSAndroid Build Coastguard Worker //
433*9356374aSAndroid Build Coastguard Worker // See above for more information on predicates.
434*9356374aSAndroid Build Coastguard Worker //
435*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
436*9356374aSAndroid Build Coastguard Worker // StrSplit() Return Types
437*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------
438*9356374aSAndroid Build Coastguard Worker //
439*9356374aSAndroid Build Coastguard Worker // The `StrSplit()` function adapts the returned collection to the collection
440*9356374aSAndroid Build Coastguard Worker // specified by the caller (e.g. `std::vector` above). The returned collections
441*9356374aSAndroid Build Coastguard Worker // may contain `std::string`, `absl::string_view` (in which case the original
442*9356374aSAndroid Build Coastguard Worker // string being split must ensure that it outlives the collection), or any
443*9356374aSAndroid Build Coastguard Worker // object that can be explicitly created from an `absl::string_view`. This
444*9356374aSAndroid Build Coastguard Worker // behavior works for:
445*9356374aSAndroid Build Coastguard Worker //
446*9356374aSAndroid Build Coastguard Worker // 1) All standard STL containers including `std::vector`, `std::list`,
447*9356374aSAndroid Build Coastguard Worker //    `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap`
448*9356374aSAndroid Build Coastguard Worker // 2) `std::pair` (which is not actually a container). See below.
449*9356374aSAndroid Build Coastguard Worker //
450*9356374aSAndroid Build Coastguard Worker // Example:
451*9356374aSAndroid Build Coastguard Worker //
452*9356374aSAndroid Build Coastguard Worker //   // The results are returned as `absl::string_view` objects. Note that we
453*9356374aSAndroid Build Coastguard Worker //   // have to ensure that the input string outlives any results.
454*9356374aSAndroid Build Coastguard Worker //   std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
455*9356374aSAndroid Build Coastguard Worker //
456*9356374aSAndroid Build Coastguard Worker //   // Stores results in a std::set<std::string>, which also performs
457*9356374aSAndroid Build Coastguard Worker //   // de-duplication and orders the elements in ascending order.
458*9356374aSAndroid Build Coastguard Worker //   std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ',');
459*9356374aSAndroid Build Coastguard Worker //   // a[0] == "a", a[1] == "b", a[2] == "c"
460*9356374aSAndroid Build Coastguard Worker //
461*9356374aSAndroid Build Coastguard Worker //   // `StrSplit()` can be used within a range-based for loop, in which case
462*9356374aSAndroid Build Coastguard Worker //   // each element will be of type `absl::string_view`.
463*9356374aSAndroid Build Coastguard Worker //   std::vector<std::string> v;
464*9356374aSAndroid Build Coastguard Worker //   for (const auto sv : absl::StrSplit("a,b,c", ',')) {
465*9356374aSAndroid Build Coastguard Worker //     if (sv != "b") v.emplace_back(sv);
466*9356374aSAndroid Build Coastguard Worker //   }
467*9356374aSAndroid Build Coastguard Worker //   // v[0] == "a", v[1] == "c"
468*9356374aSAndroid Build Coastguard Worker //
469*9356374aSAndroid Build Coastguard Worker //   // Stores results in a map. The map implementation assumes that the input
470*9356374aSAndroid Build Coastguard Worker //   // is provided as a series of key/value pairs. For example, the 0th element
471*9356374aSAndroid Build Coastguard Worker //   // resulting from the split will be stored as a key to the 1st element. If
472*9356374aSAndroid Build Coastguard Worker //   // an odd number of elements are resolved, the last element is paired with
473*9356374aSAndroid Build Coastguard Worker //   // a default-constructed value (e.g., empty string).
474*9356374aSAndroid Build Coastguard Worker //   std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ',');
475*9356374aSAndroid Build Coastguard Worker //   // m["a"] == "b", m["c"] == ""     // last component value equals ""
476*9356374aSAndroid Build Coastguard Worker //
477*9356374aSAndroid Build Coastguard Worker // Splitting to `std::pair` is an interesting case because it can hold only two
478*9356374aSAndroid Build Coastguard Worker // elements and is not a collection type. When splitting to a `std::pair` the
479*9356374aSAndroid Build Coastguard Worker // first two split strings become the `std::pair` `.first` and `.second`
480*9356374aSAndroid Build Coastguard Worker // members, respectively. The remaining split substrings are discarded. If there
481*9356374aSAndroid Build Coastguard Worker // are less than two split substrings, the empty string is used for the
482*9356374aSAndroid Build Coastguard Worker // corresponding `std::pair` member.
483*9356374aSAndroid Build Coastguard Worker //
484*9356374aSAndroid Build Coastguard Worker // Example:
485*9356374aSAndroid Build Coastguard Worker //
486*9356374aSAndroid Build Coastguard Worker //   // Stores first two split strings as the members in a std::pair.
487*9356374aSAndroid Build Coastguard Worker //   std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
488*9356374aSAndroid Build Coastguard Worker //   // p.first == "a", p.second == "b"       // "c" is omitted.
489*9356374aSAndroid Build Coastguard Worker //
490*9356374aSAndroid Build Coastguard Worker // The `StrSplit()` function can be used multiple times to perform more
491*9356374aSAndroid Build Coastguard Worker // complicated splitting logic, such as intelligently parsing key-value pairs.
492*9356374aSAndroid Build Coastguard Worker //
493*9356374aSAndroid Build Coastguard Worker // Example:
494*9356374aSAndroid Build Coastguard Worker //
495*9356374aSAndroid Build Coastguard Worker //   // The input string "a=b=c,d=e,f=,g" becomes
496*9356374aSAndroid Build Coastguard Worker //   // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" }
497*9356374aSAndroid Build Coastguard Worker //   std::map<std::string, std::string> m;
498*9356374aSAndroid Build Coastguard Worker //   for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
499*9356374aSAndroid Build Coastguard Worker //     m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
500*9356374aSAndroid Build Coastguard Worker //   }
501*9356374aSAndroid Build Coastguard Worker //   EXPECT_EQ("b=c", m.find("a")->second);
502*9356374aSAndroid Build Coastguard Worker //   EXPECT_EQ("e", m.find("d")->second);
503*9356374aSAndroid Build Coastguard Worker //   EXPECT_EQ("", m.find("f")->second);
504*9356374aSAndroid Build Coastguard Worker //   EXPECT_EQ("", m.find("g")->second);
505*9356374aSAndroid Build Coastguard Worker //
506*9356374aSAndroid Build Coastguard Worker // WARNING: Due to a legacy bug that is maintained for backward compatibility,
507*9356374aSAndroid Build Coastguard Worker // splitting the following empty string_views produces different results:
508*9356374aSAndroid Build Coastguard Worker //
509*9356374aSAndroid Build Coastguard Worker //   absl::StrSplit(absl::string_view(""), '-');  // {""}
510*9356374aSAndroid Build Coastguard Worker //   absl::StrSplit(absl::string_view(), '-');    // {}, but should be {""}
511*9356374aSAndroid Build Coastguard Worker //
512*9356374aSAndroid Build Coastguard Worker // Try not to depend on this distinction because the bug may one day be fixed.
513*9356374aSAndroid Build Coastguard Worker template <typename Delimiter>
514*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter<
515*9356374aSAndroid Build Coastguard Worker     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
516*9356374aSAndroid Build Coastguard Worker     absl::string_view>
517*9356374aSAndroid Build Coastguard Worker StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) {
518*9356374aSAndroid Build Coastguard Worker   using DelimiterType =
519*9356374aSAndroid Build Coastguard Worker       typename strings_internal::SelectDelimiter<Delimiter>::type;
520*9356374aSAndroid Build Coastguard Worker   return strings_internal::Splitter<DelimiterType, AllowEmpty,
521*9356374aSAndroid Build Coastguard Worker                                     absl::string_view>(
522*9356374aSAndroid Build Coastguard Worker       text.value(), DelimiterType(d), AllowEmpty());
523*9356374aSAndroid Build Coastguard Worker }
524*9356374aSAndroid Build Coastguard Worker 
525*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename StringType,
526*9356374aSAndroid Build Coastguard Worker           EnableSplitIfString<StringType> = 0>
527*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter<
528*9356374aSAndroid Build Coastguard Worker     typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty,
529*9356374aSAndroid Build Coastguard Worker     std::string>
530*9356374aSAndroid Build Coastguard Worker StrSplit(StringType&& text, Delimiter d) {
531*9356374aSAndroid Build Coastguard Worker   using DelimiterType =
532*9356374aSAndroid Build Coastguard Worker       typename strings_internal::SelectDelimiter<Delimiter>::type;
533*9356374aSAndroid Build Coastguard Worker   return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>(
534*9356374aSAndroid Build Coastguard Worker       std::move(text), DelimiterType(d), AllowEmpty());
535*9356374aSAndroid Build Coastguard Worker }
536*9356374aSAndroid Build Coastguard Worker 
537*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename Predicate>
538*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter<
539*9356374aSAndroid Build Coastguard Worker     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
540*9356374aSAndroid Build Coastguard Worker     absl::string_view>
541*9356374aSAndroid Build Coastguard Worker StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d,
542*9356374aSAndroid Build Coastguard Worker          Predicate p) {
543*9356374aSAndroid Build Coastguard Worker   using DelimiterType =
544*9356374aSAndroid Build Coastguard Worker       typename strings_internal::SelectDelimiter<Delimiter>::type;
545*9356374aSAndroid Build Coastguard Worker   return strings_internal::Splitter<DelimiterType, Predicate,
546*9356374aSAndroid Build Coastguard Worker                                     absl::string_view>(
547*9356374aSAndroid Build Coastguard Worker       text.value(), DelimiterType(std::move(d)), std::move(p));
548*9356374aSAndroid Build Coastguard Worker }
549*9356374aSAndroid Build Coastguard Worker 
550*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename Predicate, typename StringType,
551*9356374aSAndroid Build Coastguard Worker           EnableSplitIfString<StringType> = 0>
552*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter<
553*9356374aSAndroid Build Coastguard Worker     typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate,
554*9356374aSAndroid Build Coastguard Worker     std::string>
555*9356374aSAndroid Build Coastguard Worker StrSplit(StringType&& text, Delimiter d, Predicate p) {
556*9356374aSAndroid Build Coastguard Worker   using DelimiterType =
557*9356374aSAndroid Build Coastguard Worker       typename strings_internal::SelectDelimiter<Delimiter>::type;
558*9356374aSAndroid Build Coastguard Worker   return strings_internal::Splitter<DelimiterType, Predicate, std::string>(
559*9356374aSAndroid Build Coastguard Worker       std::move(text), DelimiterType(d), std::move(p));
560*9356374aSAndroid Build Coastguard Worker }
561*9356374aSAndroid Build Coastguard Worker 
562*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END
563*9356374aSAndroid Build Coastguard Worker }  // namespace absl
564*9356374aSAndroid Build Coastguard Worker 
565*9356374aSAndroid Build Coastguard Worker #endif  // ABSL_STRINGS_STR_SPLIT_H_
566