1*9356374aSAndroid Build Coastguard Worker // 2*9356374aSAndroid Build Coastguard Worker // Copyright 2017 The Abseil Authors. 3*9356374aSAndroid Build Coastguard Worker // 4*9356374aSAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License"); 5*9356374aSAndroid Build Coastguard Worker // you may not use this file except in compliance with the License. 6*9356374aSAndroid Build Coastguard Worker // You may obtain a copy of the License at 7*9356374aSAndroid Build Coastguard Worker // 8*9356374aSAndroid Build Coastguard Worker // https://www.apache.org/licenses/LICENSE-2.0 9*9356374aSAndroid Build Coastguard Worker // 10*9356374aSAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software 11*9356374aSAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS, 12*9356374aSAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13*9356374aSAndroid Build Coastguard Worker // See the License for the specific language governing permissions and 14*9356374aSAndroid Build Coastguard Worker // limitations under the License. 15*9356374aSAndroid Build Coastguard Worker // 16*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------------- 17*9356374aSAndroid Build Coastguard Worker // File: str_split.h 18*9356374aSAndroid Build Coastguard Worker // ----------------------------------------------------------------------------- 19*9356374aSAndroid Build Coastguard Worker // 20*9356374aSAndroid Build Coastguard Worker // This file contains functions for splitting strings. It defines the main 21*9356374aSAndroid Build Coastguard Worker // `StrSplit()` function, several delimiters for determining the boundaries on 22*9356374aSAndroid Build Coastguard Worker // which to split the string, and predicates for filtering delimited results. 23*9356374aSAndroid Build Coastguard Worker // `StrSplit()` adapts the returned collection to the type specified by the 24*9356374aSAndroid Build Coastguard Worker // caller. 25*9356374aSAndroid Build Coastguard Worker // 26*9356374aSAndroid Build Coastguard Worker // Example: 27*9356374aSAndroid Build Coastguard Worker // 28*9356374aSAndroid Build Coastguard Worker // // Splits the given string on commas. Returns the results in a 29*9356374aSAndroid Build Coastguard Worker // // vector of strings. 30*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a,b,c", ','); 31*9356374aSAndroid Build Coastguard Worker // // Can also use "," 32*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c" 33*9356374aSAndroid Build Coastguard Worker // 34*9356374aSAndroid Build Coastguard Worker // See StrSplit() below for more information. 35*9356374aSAndroid Build Coastguard Worker #ifndef ABSL_STRINGS_STR_SPLIT_H_ 36*9356374aSAndroid Build Coastguard Worker #define ABSL_STRINGS_STR_SPLIT_H_ 37*9356374aSAndroid Build Coastguard Worker 38*9356374aSAndroid Build Coastguard Worker #include <algorithm> 39*9356374aSAndroid Build Coastguard Worker #include <cstddef> 40*9356374aSAndroid Build Coastguard Worker #include <map> 41*9356374aSAndroid Build Coastguard Worker #include <set> 42*9356374aSAndroid Build Coastguard Worker #include <string> 43*9356374aSAndroid Build Coastguard Worker #include <utility> 44*9356374aSAndroid Build Coastguard Worker #include <vector> 45*9356374aSAndroid Build Coastguard Worker 46*9356374aSAndroid Build Coastguard Worker #include "absl/base/internal/raw_logging.h" 47*9356374aSAndroid Build Coastguard Worker #include "absl/base/macros.h" 48*9356374aSAndroid Build Coastguard Worker #include "absl/strings/internal/str_split_internal.h" 49*9356374aSAndroid Build Coastguard Worker #include "absl/strings/string_view.h" 50*9356374aSAndroid Build Coastguard Worker #include "absl/strings/strip.h" 51*9356374aSAndroid Build Coastguard Worker 52*9356374aSAndroid Build Coastguard Worker namespace absl { 53*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_BEGIN 54*9356374aSAndroid Build Coastguard Worker 55*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 56*9356374aSAndroid Build Coastguard Worker // Delimiters 57*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 58*9356374aSAndroid Build Coastguard Worker // 59*9356374aSAndroid Build Coastguard Worker // `StrSplit()` uses delimiters to define the boundaries between elements in the 60*9356374aSAndroid Build Coastguard Worker // provided input. Several `Delimiter` types are defined below. If a string 61*9356374aSAndroid Build Coastguard Worker // (`const char*`, `std::string`, or `absl::string_view`) is passed in place of 62*9356374aSAndroid Build Coastguard Worker // an explicit `Delimiter` object, `StrSplit()` treats it the same way as if it 63*9356374aSAndroid Build Coastguard Worker // were passed a `ByString` delimiter. 64*9356374aSAndroid Build Coastguard Worker // 65*9356374aSAndroid Build Coastguard Worker // A `Delimiter` is an object with a `Find()` function that knows how to find 66*9356374aSAndroid Build Coastguard Worker // the first occurrence of itself in a given `absl::string_view`. 67*9356374aSAndroid Build Coastguard Worker // 68*9356374aSAndroid Build Coastguard Worker // The following `Delimiter` types are available for use within `StrSplit()`: 69*9356374aSAndroid Build Coastguard Worker // 70*9356374aSAndroid Build Coastguard Worker // - `ByString` (default for string arguments) 71*9356374aSAndroid Build Coastguard Worker // - `ByChar` (default for a char argument) 72*9356374aSAndroid Build Coastguard Worker // - `ByAnyChar` 73*9356374aSAndroid Build Coastguard Worker // - `ByLength` 74*9356374aSAndroid Build Coastguard Worker // - `MaxSplits` 75*9356374aSAndroid Build Coastguard Worker // 76*9356374aSAndroid Build Coastguard Worker // A Delimiter's `Find()` member function will be passed an input `text` that is 77*9356374aSAndroid Build Coastguard Worker // to be split and a position (`pos`) to begin searching for the next delimiter 78*9356374aSAndroid Build Coastguard Worker // in `text`. The returned absl::string_view should refer to the next occurrence 79*9356374aSAndroid Build Coastguard Worker // (after `pos`) of the represented delimiter; this returned absl::string_view 80*9356374aSAndroid Build Coastguard Worker // represents the next location where the input `text` should be broken. 81*9356374aSAndroid Build Coastguard Worker // 82*9356374aSAndroid Build Coastguard Worker // The returned absl::string_view may be zero-length if the Delimiter does not 83*9356374aSAndroid Build Coastguard Worker // represent a part of the string (e.g., a fixed-length delimiter). If no 84*9356374aSAndroid Build Coastguard Worker // delimiter is found in the input `text`, a zero-length absl::string_view 85*9356374aSAndroid Build Coastguard Worker // referring to `text.end()` should be returned (e.g., 86*9356374aSAndroid Build Coastguard Worker // `text.substr(text.size())`). It is important that the returned 87*9356374aSAndroid Build Coastguard Worker // absl::string_view always be within the bounds of the input `text` given as an 88*9356374aSAndroid Build Coastguard Worker // argument--it must not refer to a string that is physically located outside of 89*9356374aSAndroid Build Coastguard Worker // the given string. 90*9356374aSAndroid Build Coastguard Worker // 91*9356374aSAndroid Build Coastguard Worker // The following example is a simple Delimiter object that is created with a 92*9356374aSAndroid Build Coastguard Worker // single char and will look for that char in the text passed to the `Find()` 93*9356374aSAndroid Build Coastguard Worker // function: 94*9356374aSAndroid Build Coastguard Worker // 95*9356374aSAndroid Build Coastguard Worker // struct SimpleDelimiter { 96*9356374aSAndroid Build Coastguard Worker // const char c_; 97*9356374aSAndroid Build Coastguard Worker // explicit SimpleDelimiter(char c) : c_(c) {} 98*9356374aSAndroid Build Coastguard Worker // absl::string_view Find(absl::string_view text, size_t pos) { 99*9356374aSAndroid Build Coastguard Worker // auto found = text.find(c_, pos); 100*9356374aSAndroid Build Coastguard Worker // if (found == absl::string_view::npos) 101*9356374aSAndroid Build Coastguard Worker // return text.substr(text.size()); 102*9356374aSAndroid Build Coastguard Worker // 103*9356374aSAndroid Build Coastguard Worker // return text.substr(found, 1); 104*9356374aSAndroid Build Coastguard Worker // } 105*9356374aSAndroid Build Coastguard Worker // }; 106*9356374aSAndroid Build Coastguard Worker 107*9356374aSAndroid Build Coastguard Worker // ByString 108*9356374aSAndroid Build Coastguard Worker // 109*9356374aSAndroid Build Coastguard Worker // A sub-string delimiter. If `StrSplit()` is passed a string in place of a 110*9356374aSAndroid Build Coastguard Worker // `Delimiter` object, the string will be implicitly converted into a 111*9356374aSAndroid Build Coastguard Worker // `ByString` delimiter. 112*9356374aSAndroid Build Coastguard Worker // 113*9356374aSAndroid Build Coastguard Worker // Example: 114*9356374aSAndroid Build Coastguard Worker // 115*9356374aSAndroid Build Coastguard Worker // // Because a string literal is converted to an `absl::ByString`, 116*9356374aSAndroid Build Coastguard Worker // // the following two splits are equivalent. 117*9356374aSAndroid Build Coastguard Worker // 118*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v1 = absl::StrSplit("a, b, c", ", "); 119*9356374aSAndroid Build Coastguard Worker // 120*9356374aSAndroid Build Coastguard Worker // using absl::ByString; 121*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v2 = absl::StrSplit("a, b, c", 122*9356374aSAndroid Build Coastguard Worker // ByString(", ")); 123*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c" 124*9356374aSAndroid Build Coastguard Worker class ByString { 125*9356374aSAndroid Build Coastguard Worker public: 126*9356374aSAndroid Build Coastguard Worker explicit ByString(absl::string_view sp); 127*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) const; 128*9356374aSAndroid Build Coastguard Worker 129*9356374aSAndroid Build Coastguard Worker private: 130*9356374aSAndroid Build Coastguard Worker const std::string delimiter_; 131*9356374aSAndroid Build Coastguard Worker }; 132*9356374aSAndroid Build Coastguard Worker 133*9356374aSAndroid Build Coastguard Worker // ByAsciiWhitespace 134*9356374aSAndroid Build Coastguard Worker // 135*9356374aSAndroid Build Coastguard Worker // A sub-string delimiter that splits by ASCII whitespace 136*9356374aSAndroid Build Coastguard Worker // (space, tab, vertical tab, formfeed, linefeed, or carriage return). 137*9356374aSAndroid Build Coastguard Worker // Note: you probably want to use absl::SkipEmpty() as well! 138*9356374aSAndroid Build Coastguard Worker // 139*9356374aSAndroid Build Coastguard Worker // This class is equivalent to ByAnyChar with ASCII whitespace chars. 140*9356374aSAndroid Build Coastguard Worker // 141*9356374aSAndroid Build Coastguard Worker // Example: 142*9356374aSAndroid Build Coastguard Worker // 143*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit( 144*9356374aSAndroid Build Coastguard Worker // "a b\tc\n d \n", absl::ByAsciiWhitespace(), absl::SkipEmpty()); 145*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" 146*9356374aSAndroid Build Coastguard Worker class ByAsciiWhitespace { 147*9356374aSAndroid Build Coastguard Worker public: 148*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) const; 149*9356374aSAndroid Build Coastguard Worker }; 150*9356374aSAndroid Build Coastguard Worker 151*9356374aSAndroid Build Coastguard Worker // ByChar 152*9356374aSAndroid Build Coastguard Worker // 153*9356374aSAndroid Build Coastguard Worker // A single character delimiter. `ByChar` is functionally equivalent to a 154*9356374aSAndroid Build Coastguard Worker // 1-char string within a `ByString` delimiter, but slightly more efficient. 155*9356374aSAndroid Build Coastguard Worker // 156*9356374aSAndroid Build Coastguard Worker // Example: 157*9356374aSAndroid Build Coastguard Worker // 158*9356374aSAndroid Build Coastguard Worker // // Because a char literal is converted to a absl::ByChar, 159*9356374aSAndroid Build Coastguard Worker // // the following two splits are equivalent. 160*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v1 = absl::StrSplit("a,b,c", ','); 161*9356374aSAndroid Build Coastguard Worker // using absl::ByChar; 162*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v2 = absl::StrSplit("a,b,c", ByChar(',')); 163*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c" 164*9356374aSAndroid Build Coastguard Worker // 165*9356374aSAndroid Build Coastguard Worker // `ByChar` is also the default delimiter if a single character is given 166*9356374aSAndroid Build Coastguard Worker // as the delimiter to `StrSplit()`. For example, the following calls are 167*9356374aSAndroid Build Coastguard Worker // equivalent: 168*9356374aSAndroid Build Coastguard Worker // 169*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a-b", '-'); 170*9356374aSAndroid Build Coastguard Worker // 171*9356374aSAndroid Build Coastguard Worker // using absl::ByChar; 172*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a-b", ByChar('-')); 173*9356374aSAndroid Build Coastguard Worker // 174*9356374aSAndroid Build Coastguard Worker class ByChar { 175*9356374aSAndroid Build Coastguard Worker public: ByChar(char c)176*9356374aSAndroid Build Coastguard Worker explicit ByChar(char c) : c_(c) {} 177*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) const; 178*9356374aSAndroid Build Coastguard Worker 179*9356374aSAndroid Build Coastguard Worker private: 180*9356374aSAndroid Build Coastguard Worker char c_; 181*9356374aSAndroid Build Coastguard Worker }; 182*9356374aSAndroid Build Coastguard Worker 183*9356374aSAndroid Build Coastguard Worker // ByAnyChar 184*9356374aSAndroid Build Coastguard Worker // 185*9356374aSAndroid Build Coastguard Worker // A delimiter that will match any of the given byte-sized characters within 186*9356374aSAndroid Build Coastguard Worker // its provided string. 187*9356374aSAndroid Build Coastguard Worker // 188*9356374aSAndroid Build Coastguard Worker // Note: this delimiter works with single-byte string data, but does not work 189*9356374aSAndroid Build Coastguard Worker // with variable-width encodings, such as UTF-8. 190*9356374aSAndroid Build Coastguard Worker // 191*9356374aSAndroid Build Coastguard Worker // Example: 192*9356374aSAndroid Build Coastguard Worker // 193*9356374aSAndroid Build Coastguard Worker // using absl::ByAnyChar; 194*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); 195*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c" 196*9356374aSAndroid Build Coastguard Worker // 197*9356374aSAndroid Build Coastguard Worker // If `ByAnyChar` is given the empty string, it behaves exactly like 198*9356374aSAndroid Build Coastguard Worker // `ByString` and matches each individual character in the input string. 199*9356374aSAndroid Build Coastguard Worker // 200*9356374aSAndroid Build Coastguard Worker class ByAnyChar { 201*9356374aSAndroid Build Coastguard Worker public: 202*9356374aSAndroid Build Coastguard Worker explicit ByAnyChar(absl::string_view sp); 203*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) const; 204*9356374aSAndroid Build Coastguard Worker 205*9356374aSAndroid Build Coastguard Worker private: 206*9356374aSAndroid Build Coastguard Worker const std::string delimiters_; 207*9356374aSAndroid Build Coastguard Worker }; 208*9356374aSAndroid Build Coastguard Worker 209*9356374aSAndroid Build Coastguard Worker // ByLength 210*9356374aSAndroid Build Coastguard Worker // 211*9356374aSAndroid Build Coastguard Worker // A delimiter for splitting into equal-length strings. The length argument to 212*9356374aSAndroid Build Coastguard Worker // the constructor must be greater than 0. 213*9356374aSAndroid Build Coastguard Worker // 214*9356374aSAndroid Build Coastguard Worker // Note: this delimiter works with single-byte string data, but does not work 215*9356374aSAndroid Build Coastguard Worker // with variable-width encodings, such as UTF-8. 216*9356374aSAndroid Build Coastguard Worker // 217*9356374aSAndroid Build Coastguard Worker // Example: 218*9356374aSAndroid Build Coastguard Worker // 219*9356374aSAndroid Build Coastguard Worker // using absl::ByLength; 220*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("123456789", ByLength(3)); 221*9356374aSAndroid Build Coastguard Worker 222*9356374aSAndroid Build Coastguard Worker // // v[0] == "123", v[1] == "456", v[2] == "789" 223*9356374aSAndroid Build Coastguard Worker // 224*9356374aSAndroid Build Coastguard Worker // Note that the string does not have to be a multiple of the fixed split 225*9356374aSAndroid Build Coastguard Worker // length. In such a case, the last substring will be shorter. 226*9356374aSAndroid Build Coastguard Worker // 227*9356374aSAndroid Build Coastguard Worker // using absl::ByLength; 228*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("12345", ByLength(2)); 229*9356374aSAndroid Build Coastguard Worker // 230*9356374aSAndroid Build Coastguard Worker // // v[0] == "12", v[1] == "34", v[2] == "5" 231*9356374aSAndroid Build Coastguard Worker class ByLength { 232*9356374aSAndroid Build Coastguard Worker public: 233*9356374aSAndroid Build Coastguard Worker explicit ByLength(ptrdiff_t length); 234*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) const; 235*9356374aSAndroid Build Coastguard Worker 236*9356374aSAndroid Build Coastguard Worker private: 237*9356374aSAndroid Build Coastguard Worker const ptrdiff_t length_; 238*9356374aSAndroid Build Coastguard Worker }; 239*9356374aSAndroid Build Coastguard Worker 240*9356374aSAndroid Build Coastguard Worker namespace strings_internal { 241*9356374aSAndroid Build Coastguard Worker 242*9356374aSAndroid Build Coastguard Worker // A traits-like metafunction for selecting the default Delimiter object type 243*9356374aSAndroid Build Coastguard Worker // for a particular Delimiter type. The base case simply exposes type Delimiter 244*9356374aSAndroid Build Coastguard Worker // itself as the delimiter's Type. However, there are specializations for 245*9356374aSAndroid Build Coastguard Worker // string-like objects that map them to the ByString delimiter object. 246*9356374aSAndroid Build Coastguard Worker // This allows functions like absl::StrSplit() and absl::MaxSplits() to accept 247*9356374aSAndroid Build Coastguard Worker // string-like objects (e.g., ',') as delimiter arguments but they will be 248*9356374aSAndroid Build Coastguard Worker // treated as if a ByString delimiter was given. 249*9356374aSAndroid Build Coastguard Worker template <typename Delimiter> 250*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter { 251*9356374aSAndroid Build Coastguard Worker using type = Delimiter; 252*9356374aSAndroid Build Coastguard Worker }; 253*9356374aSAndroid Build Coastguard Worker 254*9356374aSAndroid Build Coastguard Worker template <> 255*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<char> { 256*9356374aSAndroid Build Coastguard Worker using type = ByChar; 257*9356374aSAndroid Build Coastguard Worker }; 258*9356374aSAndroid Build Coastguard Worker template <> 259*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<char*> { 260*9356374aSAndroid Build Coastguard Worker using type = ByString; 261*9356374aSAndroid Build Coastguard Worker }; 262*9356374aSAndroid Build Coastguard Worker template <> 263*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<const char*> { 264*9356374aSAndroid Build Coastguard Worker using type = ByString; 265*9356374aSAndroid Build Coastguard Worker }; 266*9356374aSAndroid Build Coastguard Worker template <> 267*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<absl::string_view> { 268*9356374aSAndroid Build Coastguard Worker using type = ByString; 269*9356374aSAndroid Build Coastguard Worker }; 270*9356374aSAndroid Build Coastguard Worker template <> 271*9356374aSAndroid Build Coastguard Worker struct SelectDelimiter<std::string> { 272*9356374aSAndroid Build Coastguard Worker using type = ByString; 273*9356374aSAndroid Build Coastguard Worker }; 274*9356374aSAndroid Build Coastguard Worker 275*9356374aSAndroid Build Coastguard Worker // Wraps another delimiter and sets a max number of matches for that delimiter. 276*9356374aSAndroid Build Coastguard Worker template <typename Delimiter> 277*9356374aSAndroid Build Coastguard Worker class MaxSplitsImpl { 278*9356374aSAndroid Build Coastguard Worker public: 279*9356374aSAndroid Build Coastguard Worker MaxSplitsImpl(Delimiter delimiter, int limit) 280*9356374aSAndroid Build Coastguard Worker : delimiter_(delimiter), limit_(limit), count_(0) {} 281*9356374aSAndroid Build Coastguard Worker absl::string_view Find(absl::string_view text, size_t pos) { 282*9356374aSAndroid Build Coastguard Worker if (count_++ == limit_) { 283*9356374aSAndroid Build Coastguard Worker return absl::string_view(text.data() + text.size(), 284*9356374aSAndroid Build Coastguard Worker 0); // No more matches. 285*9356374aSAndroid Build Coastguard Worker } 286*9356374aSAndroid Build Coastguard Worker return delimiter_.Find(text, pos); 287*9356374aSAndroid Build Coastguard Worker } 288*9356374aSAndroid Build Coastguard Worker 289*9356374aSAndroid Build Coastguard Worker private: 290*9356374aSAndroid Build Coastguard Worker Delimiter delimiter_; 291*9356374aSAndroid Build Coastguard Worker const int limit_; 292*9356374aSAndroid Build Coastguard Worker int count_; 293*9356374aSAndroid Build Coastguard Worker }; 294*9356374aSAndroid Build Coastguard Worker 295*9356374aSAndroid Build Coastguard Worker } // namespace strings_internal 296*9356374aSAndroid Build Coastguard Worker 297*9356374aSAndroid Build Coastguard Worker // MaxSplits() 298*9356374aSAndroid Build Coastguard Worker // 299*9356374aSAndroid Build Coastguard Worker // A delimiter that limits the number of matches which can occur to the passed 300*9356374aSAndroid Build Coastguard Worker // `limit`. The last element in the returned collection will contain all 301*9356374aSAndroid Build Coastguard Worker // remaining unsplit pieces, which may contain instances of the delimiter. 302*9356374aSAndroid Build Coastguard Worker // The collection will contain at most `limit` + 1 elements. 303*9356374aSAndroid Build Coastguard Worker // Example: 304*9356374aSAndroid Build Coastguard Worker // 305*9356374aSAndroid Build Coastguard Worker // using absl::MaxSplits; 306*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a,b,c", MaxSplits(',', 1)); 307*9356374aSAndroid Build Coastguard Worker // 308*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b,c" 309*9356374aSAndroid Build Coastguard Worker template <typename Delimiter> 310*9356374aSAndroid Build Coastguard Worker inline strings_internal::MaxSplitsImpl< 311*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type> 312*9356374aSAndroid Build Coastguard Worker MaxSplits(Delimiter delimiter, int limit) { 313*9356374aSAndroid Build Coastguard Worker typedef 314*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type DelimiterType; 315*9356374aSAndroid Build Coastguard Worker return strings_internal::MaxSplitsImpl<DelimiterType>( 316*9356374aSAndroid Build Coastguard Worker DelimiterType(delimiter), limit); 317*9356374aSAndroid Build Coastguard Worker } 318*9356374aSAndroid Build Coastguard Worker 319*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 320*9356374aSAndroid Build Coastguard Worker // Predicates 321*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 322*9356374aSAndroid Build Coastguard Worker // 323*9356374aSAndroid Build Coastguard Worker // Predicates filter the results of a `StrSplit()` by determining whether or not 324*9356374aSAndroid Build Coastguard Worker // a resultant element is included in the result set. A predicate may be passed 325*9356374aSAndroid Build Coastguard Worker // as an optional third argument to the `StrSplit()` function. 326*9356374aSAndroid Build Coastguard Worker // 327*9356374aSAndroid Build Coastguard Worker // Predicates are unary functions (or functors) that take a single 328*9356374aSAndroid Build Coastguard Worker // `absl::string_view` argument and return a bool indicating whether the 329*9356374aSAndroid Build Coastguard Worker // argument should be included (`true`) or excluded (`false`). 330*9356374aSAndroid Build Coastguard Worker // 331*9356374aSAndroid Build Coastguard Worker // Predicates are useful when filtering out empty substrings. By default, empty 332*9356374aSAndroid Build Coastguard Worker // substrings may be returned by `StrSplit()`, which is similar to the way split 333*9356374aSAndroid Build Coastguard Worker // functions work in other programming languages. 334*9356374aSAndroid Build Coastguard Worker 335*9356374aSAndroid Build Coastguard Worker // AllowEmpty() 336*9356374aSAndroid Build Coastguard Worker // 337*9356374aSAndroid Build Coastguard Worker // Always returns `true`, indicating that all strings--including empty 338*9356374aSAndroid Build Coastguard Worker // strings--should be included in the split output. This predicate is not 339*9356374aSAndroid Build Coastguard Worker // strictly needed because this is the default behavior of `StrSplit()`; 340*9356374aSAndroid Build Coastguard Worker // however, it might be useful at some call sites to make the intent explicit. 341*9356374aSAndroid Build Coastguard Worker // 342*9356374aSAndroid Build Coastguard Worker // Example: 343*9356374aSAndroid Build Coastguard Worker // 344*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', AllowEmpty()); 345*9356374aSAndroid Build Coastguard Worker // 346*9356374aSAndroid Build Coastguard Worker // // v[0] == " a ", v[1] == " ", v[2] == "", v[3] = "b", v[4] == "" 347*9356374aSAndroid Build Coastguard Worker struct AllowEmpty { 348*9356374aSAndroid Build Coastguard Worker bool operator()(absl::string_view) const { return true; } 349*9356374aSAndroid Build Coastguard Worker }; 350*9356374aSAndroid Build Coastguard Worker 351*9356374aSAndroid Build Coastguard Worker // SkipEmpty() 352*9356374aSAndroid Build Coastguard Worker // 353*9356374aSAndroid Build Coastguard Worker // Returns `false` if the given `absl::string_view` is empty, indicating that 354*9356374aSAndroid Build Coastguard Worker // `StrSplit()` should omit the empty string. 355*9356374aSAndroid Build Coastguard Worker // 356*9356374aSAndroid Build Coastguard Worker // Example: 357*9356374aSAndroid Build Coastguard Worker // 358*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit(",a,,b,", ',', SkipEmpty()); 359*9356374aSAndroid Build Coastguard Worker // 360*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b" 361*9356374aSAndroid Build Coastguard Worker // 362*9356374aSAndroid Build Coastguard Worker // Note: `SkipEmpty()` does not consider a string containing only whitespace 363*9356374aSAndroid Build Coastguard Worker // to be empty. To skip such whitespace as well, use the `SkipWhitespace()` 364*9356374aSAndroid Build Coastguard Worker // predicate. 365*9356374aSAndroid Build Coastguard Worker struct SkipEmpty { 366*9356374aSAndroid Build Coastguard Worker bool operator()(absl::string_view sp) const { return !sp.empty(); } 367*9356374aSAndroid Build Coastguard Worker }; 368*9356374aSAndroid Build Coastguard Worker 369*9356374aSAndroid Build Coastguard Worker // SkipWhitespace() 370*9356374aSAndroid Build Coastguard Worker // 371*9356374aSAndroid Build Coastguard Worker // Returns `false` if the given `absl::string_view` is empty *or* contains only 372*9356374aSAndroid Build Coastguard Worker // whitespace, indicating that `StrSplit()` should omit the string. 373*9356374aSAndroid Build Coastguard Worker // 374*9356374aSAndroid Build Coastguard Worker // Example: 375*9356374aSAndroid Build Coastguard Worker // 376*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", 377*9356374aSAndroid Build Coastguard Worker // ',', SkipWhitespace()); 378*9356374aSAndroid Build Coastguard Worker // // v[0] == " a ", v[1] == "b" 379*9356374aSAndroid Build Coastguard Worker // 380*9356374aSAndroid Build Coastguard Worker // // SkipEmpty() would return whitespace elements 381*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", ',', SkipEmpty()); 382*9356374aSAndroid Build Coastguard Worker // // v[0] == " a ", v[1] == " ", v[2] == "b" 383*9356374aSAndroid Build Coastguard Worker struct SkipWhitespace { 384*9356374aSAndroid Build Coastguard Worker bool operator()(absl::string_view sp) const { 385*9356374aSAndroid Build Coastguard Worker sp = absl::StripAsciiWhitespace(sp); 386*9356374aSAndroid Build Coastguard Worker return !sp.empty(); 387*9356374aSAndroid Build Coastguard Worker } 388*9356374aSAndroid Build Coastguard Worker }; 389*9356374aSAndroid Build Coastguard Worker 390*9356374aSAndroid Build Coastguard Worker template <typename T> 391*9356374aSAndroid Build Coastguard Worker using EnableSplitIfString = 392*9356374aSAndroid Build Coastguard Worker typename std::enable_if<std::is_same<T, std::string>::value || 393*9356374aSAndroid Build Coastguard Worker std::is_same<T, const std::string>::value, 394*9356374aSAndroid Build Coastguard Worker int>::type; 395*9356374aSAndroid Build Coastguard Worker 396*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 397*9356374aSAndroid Build Coastguard Worker // StrSplit() 398*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 399*9356374aSAndroid Build Coastguard Worker 400*9356374aSAndroid Build Coastguard Worker // StrSplit() 401*9356374aSAndroid Build Coastguard Worker // 402*9356374aSAndroid Build Coastguard Worker // Splits a given string based on the provided `Delimiter` object, returning the 403*9356374aSAndroid Build Coastguard Worker // elements within the type specified by the caller. Optionally, you may pass a 404*9356374aSAndroid Build Coastguard Worker // `Predicate` to `StrSplit()` indicating whether to include or exclude the 405*9356374aSAndroid Build Coastguard Worker // resulting element within the final result set. (See the overviews for 406*9356374aSAndroid Build Coastguard Worker // Delimiters and Predicates above.) 407*9356374aSAndroid Build Coastguard Worker // 408*9356374aSAndroid Build Coastguard Worker // Example: 409*9356374aSAndroid Build Coastguard Worker // 410*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a,b,c,d", ','); 411*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c", v[3] == "d" 412*9356374aSAndroid Build Coastguard Worker // 413*9356374aSAndroid Build Coastguard Worker // You can also provide an explicit `Delimiter` object: 414*9356374aSAndroid Build Coastguard Worker // 415*9356374aSAndroid Build Coastguard Worker // Example: 416*9356374aSAndroid Build Coastguard Worker // 417*9356374aSAndroid Build Coastguard Worker // using absl::ByAnyChar; 418*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit("a,b=c", ByAnyChar(",=")); 419*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "b", v[2] == "c" 420*9356374aSAndroid Build Coastguard Worker // 421*9356374aSAndroid Build Coastguard Worker // See above for more information on delimiters. 422*9356374aSAndroid Build Coastguard Worker // 423*9356374aSAndroid Build Coastguard Worker // By default, empty strings are included in the result set. You can optionally 424*9356374aSAndroid Build Coastguard Worker // include a third `Predicate` argument to apply a test for whether the 425*9356374aSAndroid Build Coastguard Worker // resultant element should be included in the result set: 426*9356374aSAndroid Build Coastguard Worker // 427*9356374aSAndroid Build Coastguard Worker // Example: 428*9356374aSAndroid Build Coastguard Worker // 429*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v = absl::StrSplit(" a , ,,b,", 430*9356374aSAndroid Build Coastguard Worker // ',', SkipWhitespace()); 431*9356374aSAndroid Build Coastguard Worker // // v[0] == " a ", v[1] == "b" 432*9356374aSAndroid Build Coastguard Worker // 433*9356374aSAndroid Build Coastguard Worker // See above for more information on predicates. 434*9356374aSAndroid Build Coastguard Worker // 435*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 436*9356374aSAndroid Build Coastguard Worker // StrSplit() Return Types 437*9356374aSAndroid Build Coastguard Worker //------------------------------------------------------------------------------ 438*9356374aSAndroid Build Coastguard Worker // 439*9356374aSAndroid Build Coastguard Worker // The `StrSplit()` function adapts the returned collection to the collection 440*9356374aSAndroid Build Coastguard Worker // specified by the caller (e.g. `std::vector` above). The returned collections 441*9356374aSAndroid Build Coastguard Worker // may contain `std::string`, `absl::string_view` (in which case the original 442*9356374aSAndroid Build Coastguard Worker // string being split must ensure that it outlives the collection), or any 443*9356374aSAndroid Build Coastguard Worker // object that can be explicitly created from an `absl::string_view`. This 444*9356374aSAndroid Build Coastguard Worker // behavior works for: 445*9356374aSAndroid Build Coastguard Worker // 446*9356374aSAndroid Build Coastguard Worker // 1) All standard STL containers including `std::vector`, `std::list`, 447*9356374aSAndroid Build Coastguard Worker // `std::deque`, `std::set`,`std::multiset`, 'std::map`, and `std::multimap` 448*9356374aSAndroid Build Coastguard Worker // 2) `std::pair` (which is not actually a container). See below. 449*9356374aSAndroid Build Coastguard Worker // 450*9356374aSAndroid Build Coastguard Worker // Example: 451*9356374aSAndroid Build Coastguard Worker // 452*9356374aSAndroid Build Coastguard Worker // // The results are returned as `absl::string_view` objects. Note that we 453*9356374aSAndroid Build Coastguard Worker // // have to ensure that the input string outlives any results. 454*9356374aSAndroid Build Coastguard Worker // std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ','); 455*9356374aSAndroid Build Coastguard Worker // 456*9356374aSAndroid Build Coastguard Worker // // Stores results in a std::set<std::string>, which also performs 457*9356374aSAndroid Build Coastguard Worker // // de-duplication and orders the elements in ascending order. 458*9356374aSAndroid Build Coastguard Worker // std::set<std::string> a = absl::StrSplit("b,a,c,a,b", ','); 459*9356374aSAndroid Build Coastguard Worker // // a[0] == "a", a[1] == "b", a[2] == "c" 460*9356374aSAndroid Build Coastguard Worker // 461*9356374aSAndroid Build Coastguard Worker // // `StrSplit()` can be used within a range-based for loop, in which case 462*9356374aSAndroid Build Coastguard Worker // // each element will be of type `absl::string_view`. 463*9356374aSAndroid Build Coastguard Worker // std::vector<std::string> v; 464*9356374aSAndroid Build Coastguard Worker // for (const auto sv : absl::StrSplit("a,b,c", ',')) { 465*9356374aSAndroid Build Coastguard Worker // if (sv != "b") v.emplace_back(sv); 466*9356374aSAndroid Build Coastguard Worker // } 467*9356374aSAndroid Build Coastguard Worker // // v[0] == "a", v[1] == "c" 468*9356374aSAndroid Build Coastguard Worker // 469*9356374aSAndroid Build Coastguard Worker // // Stores results in a map. The map implementation assumes that the input 470*9356374aSAndroid Build Coastguard Worker // // is provided as a series of key/value pairs. For example, the 0th element 471*9356374aSAndroid Build Coastguard Worker // // resulting from the split will be stored as a key to the 1st element. If 472*9356374aSAndroid Build Coastguard Worker // // an odd number of elements are resolved, the last element is paired with 473*9356374aSAndroid Build Coastguard Worker // // a default-constructed value (e.g., empty string). 474*9356374aSAndroid Build Coastguard Worker // std::map<std::string, std::string> m = absl::StrSplit("a,b,c", ','); 475*9356374aSAndroid Build Coastguard Worker // // m["a"] == "b", m["c"] == "" // last component value equals "" 476*9356374aSAndroid Build Coastguard Worker // 477*9356374aSAndroid Build Coastguard Worker // Splitting to `std::pair` is an interesting case because it can hold only two 478*9356374aSAndroid Build Coastguard Worker // elements and is not a collection type. When splitting to a `std::pair` the 479*9356374aSAndroid Build Coastguard Worker // first two split strings become the `std::pair` `.first` and `.second` 480*9356374aSAndroid Build Coastguard Worker // members, respectively. The remaining split substrings are discarded. If there 481*9356374aSAndroid Build Coastguard Worker // are less than two split substrings, the empty string is used for the 482*9356374aSAndroid Build Coastguard Worker // corresponding `std::pair` member. 483*9356374aSAndroid Build Coastguard Worker // 484*9356374aSAndroid Build Coastguard Worker // Example: 485*9356374aSAndroid Build Coastguard Worker // 486*9356374aSAndroid Build Coastguard Worker // // Stores first two split strings as the members in a std::pair. 487*9356374aSAndroid Build Coastguard Worker // std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ','); 488*9356374aSAndroid Build Coastguard Worker // // p.first == "a", p.second == "b" // "c" is omitted. 489*9356374aSAndroid Build Coastguard Worker // 490*9356374aSAndroid Build Coastguard Worker // The `StrSplit()` function can be used multiple times to perform more 491*9356374aSAndroid Build Coastguard Worker // complicated splitting logic, such as intelligently parsing key-value pairs. 492*9356374aSAndroid Build Coastguard Worker // 493*9356374aSAndroid Build Coastguard Worker // Example: 494*9356374aSAndroid Build Coastguard Worker // 495*9356374aSAndroid Build Coastguard Worker // // The input string "a=b=c,d=e,f=,g" becomes 496*9356374aSAndroid Build Coastguard Worker // // { "a" => "b=c", "d" => "e", "f" => "", "g" => "" } 497*9356374aSAndroid Build Coastguard Worker // std::map<std::string, std::string> m; 498*9356374aSAndroid Build Coastguard Worker // for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) { 499*9356374aSAndroid Build Coastguard Worker // m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1))); 500*9356374aSAndroid Build Coastguard Worker // } 501*9356374aSAndroid Build Coastguard Worker // EXPECT_EQ("b=c", m.find("a")->second); 502*9356374aSAndroid Build Coastguard Worker // EXPECT_EQ("e", m.find("d")->second); 503*9356374aSAndroid Build Coastguard Worker // EXPECT_EQ("", m.find("f")->second); 504*9356374aSAndroid Build Coastguard Worker // EXPECT_EQ("", m.find("g")->second); 505*9356374aSAndroid Build Coastguard Worker // 506*9356374aSAndroid Build Coastguard Worker // WARNING: Due to a legacy bug that is maintained for backward compatibility, 507*9356374aSAndroid Build Coastguard Worker // splitting the following empty string_views produces different results: 508*9356374aSAndroid Build Coastguard Worker // 509*9356374aSAndroid Build Coastguard Worker // absl::StrSplit(absl::string_view(""), '-'); // {""} 510*9356374aSAndroid Build Coastguard Worker // absl::StrSplit(absl::string_view(), '-'); // {}, but should be {""} 511*9356374aSAndroid Build Coastguard Worker // 512*9356374aSAndroid Build Coastguard Worker // Try not to depend on this distinction because the bug may one day be fixed. 513*9356374aSAndroid Build Coastguard Worker template <typename Delimiter> 514*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter< 515*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, 516*9356374aSAndroid Build Coastguard Worker absl::string_view> 517*9356374aSAndroid Build Coastguard Worker StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d) { 518*9356374aSAndroid Build Coastguard Worker using DelimiterType = 519*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type; 520*9356374aSAndroid Build Coastguard Worker return strings_internal::Splitter<DelimiterType, AllowEmpty, 521*9356374aSAndroid Build Coastguard Worker absl::string_view>( 522*9356374aSAndroid Build Coastguard Worker text.value(), DelimiterType(d), AllowEmpty()); 523*9356374aSAndroid Build Coastguard Worker } 524*9356374aSAndroid Build Coastguard Worker 525*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename StringType, 526*9356374aSAndroid Build Coastguard Worker EnableSplitIfString<StringType> = 0> 527*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter< 528*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type, AllowEmpty, 529*9356374aSAndroid Build Coastguard Worker std::string> 530*9356374aSAndroid Build Coastguard Worker StrSplit(StringType&& text, Delimiter d) { 531*9356374aSAndroid Build Coastguard Worker using DelimiterType = 532*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type; 533*9356374aSAndroid Build Coastguard Worker return strings_internal::Splitter<DelimiterType, AllowEmpty, std::string>( 534*9356374aSAndroid Build Coastguard Worker std::move(text), DelimiterType(d), AllowEmpty()); 535*9356374aSAndroid Build Coastguard Worker } 536*9356374aSAndroid Build Coastguard Worker 537*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename Predicate> 538*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter< 539*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, 540*9356374aSAndroid Build Coastguard Worker absl::string_view> 541*9356374aSAndroid Build Coastguard Worker StrSplit(strings_internal::ConvertibleToStringView text, Delimiter d, 542*9356374aSAndroid Build Coastguard Worker Predicate p) { 543*9356374aSAndroid Build Coastguard Worker using DelimiterType = 544*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type; 545*9356374aSAndroid Build Coastguard Worker return strings_internal::Splitter<DelimiterType, Predicate, 546*9356374aSAndroid Build Coastguard Worker absl::string_view>( 547*9356374aSAndroid Build Coastguard Worker text.value(), DelimiterType(std::move(d)), std::move(p)); 548*9356374aSAndroid Build Coastguard Worker } 549*9356374aSAndroid Build Coastguard Worker 550*9356374aSAndroid Build Coastguard Worker template <typename Delimiter, typename Predicate, typename StringType, 551*9356374aSAndroid Build Coastguard Worker EnableSplitIfString<StringType> = 0> 552*9356374aSAndroid Build Coastguard Worker strings_internal::Splitter< 553*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type, Predicate, 554*9356374aSAndroid Build Coastguard Worker std::string> 555*9356374aSAndroid Build Coastguard Worker StrSplit(StringType&& text, Delimiter d, Predicate p) { 556*9356374aSAndroid Build Coastguard Worker using DelimiterType = 557*9356374aSAndroid Build Coastguard Worker typename strings_internal::SelectDelimiter<Delimiter>::type; 558*9356374aSAndroid Build Coastguard Worker return strings_internal::Splitter<DelimiterType, Predicate, std::string>( 559*9356374aSAndroid Build Coastguard Worker std::move(text), DelimiterType(d), std::move(p)); 560*9356374aSAndroid Build Coastguard Worker } 561*9356374aSAndroid Build Coastguard Worker 562*9356374aSAndroid Build Coastguard Worker ABSL_NAMESPACE_END 563*9356374aSAndroid Build Coastguard Worker } // namespace absl 564*9356374aSAndroid Build Coastguard Worker 565*9356374aSAndroid Build Coastguard Worker #endif // ABSL_STRINGS_STR_SPLIT_H_ 566