1 // Copyright 2017 The Abseil Authors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // https://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 16 // This file declares INTERNAL parts of the Split API that are inline/templated 17 // or otherwise need to be available at compile time. The main abstractions 18 // defined in here are 19 // 20 // - ConvertibleToStringView 21 // - SplitIterator<> 22 // - Splitter<> 23 // 24 // DO NOT INCLUDE THIS FILE DIRECTLY. Use this file by including 25 // absl/strings/str_split.h. 26 // 27 // IWYU pragma: private, include "absl/strings/str_split.h" 28 29 #ifndef ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 30 #define ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 31 32 #include <array> 33 #include <initializer_list> 34 #include <iterator> 35 #include <tuple> 36 #include <type_traits> 37 #include <utility> 38 #include <vector> 39 40 #include "absl/base/macros.h" 41 #include "absl/base/port.h" 42 #include "absl/meta/type_traits.h" 43 #include "absl/strings/string_view.h" 44 45 #ifdef _GLIBCXX_DEBUG 46 #include "absl/strings/internal/stl_type_traits.h" 47 #endif // _GLIBCXX_DEBUG 48 49 namespace absl { 50 ABSL_NAMESPACE_BEGIN 51 namespace strings_internal { 52 53 // This class is implicitly constructible from everything that absl::string_view 54 // is implicitly constructible from, except for rvalue strings. This means it 55 // can be used as a function parameter in places where passing a temporary 56 // string might cause memory lifetime issues. 57 class ConvertibleToStringView { 58 public: ConvertibleToStringView(const char * s)59 ConvertibleToStringView(const char* s) // NOLINT(runtime/explicit) 60 : value_(s) {} ConvertibleToStringView(char * s)61 ConvertibleToStringView(char* s) : value_(s) {} // NOLINT(runtime/explicit) ConvertibleToStringView(absl::string_view s)62 ConvertibleToStringView(absl::string_view s) // NOLINT(runtime/explicit) 63 : value_(s) {} ConvertibleToStringView(const std::string & s)64 ConvertibleToStringView(const std::string& s) // NOLINT(runtime/explicit) 65 : value_(s) {} 66 67 // Disable conversion from rvalue strings. 68 ConvertibleToStringView(std::string&& s) = delete; 69 ConvertibleToStringView(const std::string&& s) = delete; 70 value()71 absl::string_view value() const { return value_; } 72 73 private: 74 absl::string_view value_; 75 }; 76 77 // An iterator that enumerates the parts of a string from a Splitter. The text 78 // to be split, the Delimiter, and the Predicate are all taken from the given 79 // Splitter object. Iterators may only be compared if they refer to the same 80 // Splitter instance. 81 // 82 // This class is NOT part of the public splitting API. 83 template <typename Splitter> 84 class SplitIterator { 85 public: 86 using iterator_category = std::input_iterator_tag; 87 using value_type = absl::string_view; 88 using difference_type = ptrdiff_t; 89 using pointer = const value_type*; 90 using reference = const value_type&; 91 92 enum State { kInitState, kLastState, kEndState }; SplitIterator(State state,const Splitter * splitter)93 SplitIterator(State state, const Splitter* splitter) 94 : pos_(0), 95 state_(state), 96 splitter_(splitter), 97 delimiter_(splitter->delimiter()), 98 predicate_(splitter->predicate()) { 99 // Hack to maintain backward compatibility. This one block makes it so an 100 // empty absl::string_view whose .data() happens to be nullptr behaves 101 // *differently* from an otherwise empty absl::string_view whose .data() is 102 // not nullptr. This is an undesirable difference in general, but this 103 // behavior is maintained to avoid breaking existing code that happens to 104 // depend on this old behavior/bug. Perhaps it will be fixed one day. The 105 // difference in behavior is as follows: 106 // Split(absl::string_view(""), '-'); // {""} 107 // Split(absl::string_view(), '-'); // {} 108 if (splitter_->text().data() == nullptr) { 109 state_ = kEndState; 110 pos_ = splitter_->text().size(); 111 return; 112 } 113 114 if (state_ == kEndState) { 115 pos_ = splitter_->text().size(); 116 } else { 117 ++(*this); 118 } 119 } 120 at_end()121 bool at_end() const { return state_ == kEndState; } 122 123 reference operator*() const { return curr_; } 124 pointer operator->() const { return &curr_; } 125 126 SplitIterator& operator++() { 127 do { 128 if (state_ == kLastState) { 129 state_ = kEndState; 130 return *this; 131 } 132 const absl::string_view text = splitter_->text(); 133 const absl::string_view d = delimiter_.Find(text, pos_); 134 if (d.data() == text.data() + text.size()) state_ = kLastState; 135 curr_ = text.substr(pos_, 136 static_cast<size_t>(d.data() - (text.data() + pos_))); 137 pos_ += curr_.size() + d.size(); 138 } while (!predicate_(curr_)); 139 return *this; 140 } 141 142 SplitIterator operator++(int) { 143 SplitIterator old(*this); 144 ++(*this); 145 return old; 146 } 147 148 friend bool operator==(const SplitIterator& a, const SplitIterator& b) { 149 return a.state_ == b.state_ && a.pos_ == b.pos_; 150 } 151 152 friend bool operator!=(const SplitIterator& a, const SplitIterator& b) { 153 return !(a == b); 154 } 155 156 private: 157 size_t pos_; 158 State state_; 159 absl::string_view curr_; 160 const Splitter* splitter_; 161 typename Splitter::DelimiterType delimiter_; 162 typename Splitter::PredicateType predicate_; 163 }; 164 165 // HasMappedType<T>::value is true iff there exists a type T::mapped_type. 166 template <typename T, typename = void> 167 struct HasMappedType : std::false_type {}; 168 template <typename T> 169 struct HasMappedType<T, absl::void_t<typename T::mapped_type>> 170 : std::true_type {}; 171 172 // HasValueType<T>::value is true iff there exists a type T::value_type. 173 template <typename T, typename = void> 174 struct HasValueType : std::false_type {}; 175 template <typename T> 176 struct HasValueType<T, absl::void_t<typename T::value_type>> : std::true_type { 177 }; 178 179 // HasConstIterator<T>::value is true iff there exists a type T::const_iterator. 180 template <typename T, typename = void> 181 struct HasConstIterator : std::false_type {}; 182 template <typename T> 183 struct HasConstIterator<T, absl::void_t<typename T::const_iterator>> 184 : std::true_type {}; 185 186 // HasEmplace<T>::value is true iff there exists a method T::emplace(). 187 template <typename T, typename = void> 188 struct HasEmplace : std::false_type {}; 189 template <typename T> 190 struct HasEmplace<T, absl::void_t<decltype(std::declval<T>().emplace())>> 191 : std::true_type {}; 192 193 // IsInitializerList<T>::value is true iff T is an std::initializer_list. More 194 // details below in Splitter<> where this is used. 195 std::false_type IsInitializerListDispatch(...); // default: No 196 template <typename T> 197 std::true_type IsInitializerListDispatch(std::initializer_list<T>*); 198 template <typename T> 199 struct IsInitializerList 200 : decltype(IsInitializerListDispatch(static_cast<T*>(nullptr))) {}; 201 202 // A SplitterIsConvertibleTo<C>::type alias exists iff the specified condition 203 // is true for type 'C'. 204 // 205 // Restricts conversion to container-like types (by testing for the presence of 206 // a const_iterator member type) and also to disable conversion to an 207 // std::initializer_list (which also has a const_iterator). Otherwise, code 208 // compiled in C++11 will get an error due to ambiguous conversion paths (in 209 // C++11 std::vector<T>::operator= is overloaded to take either a std::vector<T> 210 // or an std::initializer_list<T>). 211 212 template <typename C, bool has_value_type, bool has_mapped_type> 213 struct SplitterIsConvertibleToImpl : std::false_type {}; 214 215 template <typename C> 216 struct SplitterIsConvertibleToImpl<C, true, false> 217 : std::is_constructible<typename C::value_type, absl::string_view> {}; 218 219 template <typename C> 220 struct SplitterIsConvertibleToImpl<C, true, true> 221 : absl::conjunction< 222 std::is_constructible<typename C::key_type, absl::string_view>, 223 std::is_constructible<typename C::mapped_type, absl::string_view>> {}; 224 225 template <typename C> 226 struct SplitterIsConvertibleTo 227 : SplitterIsConvertibleToImpl< 228 C, 229 #ifdef _GLIBCXX_DEBUG 230 !IsStrictlyBaseOfAndConvertibleToSTLContainer<C>::value && 231 #endif // _GLIBCXX_DEBUG 232 !IsInitializerList< 233 typename std::remove_reference<C>::type>::value && 234 HasValueType<C>::value && HasConstIterator<C>::value, 235 HasMappedType<C>::value> { 236 }; 237 238 // This class implements the range that is returned by absl::StrSplit(). This 239 // class has templated conversion operators that allow it to be implicitly 240 // converted to a variety of types that the caller may have specified on the 241 // left-hand side of an assignment. 242 // 243 // The main interface for interacting with this class is through its implicit 244 // conversion operators. However, this class may also be used like a container 245 // in that it has .begin() and .end() member functions. It may also be used 246 // within a range-for loop. 247 // 248 // Output containers can be collections of any type that is constructible from 249 // an absl::string_view. 250 // 251 // An Predicate functor may be supplied. This predicate will be used to filter 252 // the split strings: only strings for which the predicate returns true will be 253 // kept. A Predicate object is any unary functor that takes an absl::string_view 254 // and returns bool. 255 // 256 // The StringType parameter can be either string_view or string, depending on 257 // whether the Splitter refers to a string stored elsewhere, or if the string 258 // resides inside the Splitter itself. 259 template <typename Delimiter, typename Predicate, typename StringType> 260 class Splitter { 261 public: 262 using DelimiterType = Delimiter; 263 using PredicateType = Predicate; 264 using const_iterator = strings_internal::SplitIterator<Splitter>; 265 using value_type = typename std::iterator_traits<const_iterator>::value_type; 266 267 Splitter(StringType input_text, Delimiter d, Predicate p) 268 : text_(std::move(input_text)), 269 delimiter_(std::move(d)), 270 predicate_(std::move(p)) {} 271 272 absl::string_view text() const { return text_; } 273 const Delimiter& delimiter() const { return delimiter_; } 274 const Predicate& predicate() const { return predicate_; } 275 276 // Range functions that iterate the split substrings as absl::string_view 277 // objects. These methods enable a Splitter to be used in a range-based for 278 // loop. 279 const_iterator begin() const { return {const_iterator::kInitState, this}; } 280 const_iterator end() const { return {const_iterator::kEndState, this}; } 281 282 // An implicit conversion operator that is restricted to only those containers 283 // that the splitter is convertible to. 284 template <typename Container, 285 typename = typename std::enable_if< 286 SplitterIsConvertibleTo<Container>::value>::type> 287 operator Container() const { // NOLINT(runtime/explicit) 288 return ConvertToContainer<Container, typename Container::value_type, 289 HasMappedType<Container>::value>()(*this); 290 } 291 292 // Returns a pair with its .first and .second members set to the first two 293 // strings returned by the begin() iterator. Either/both of .first and .second 294 // will be constructed with empty strings if the iterator doesn't have a 295 // corresponding value. 296 template <typename First, typename Second> 297 operator std::pair<First, Second>() const { // NOLINT(runtime/explicit) 298 absl::string_view first, second; 299 auto it = begin(); 300 if (it != end()) { 301 first = *it; 302 if (++it != end()) { 303 second = *it; 304 } 305 } 306 return {First(first), Second(second)}; 307 } 308 309 private: 310 // ConvertToContainer is a functor converting a Splitter to the requested 311 // Container of ValueType. It is specialized below to optimize splitting to 312 // certain combinations of Container and ValueType. 313 // 314 // This base template handles the generic case of storing the split results in 315 // the requested non-map-like container and converting the split substrings to 316 // the requested type. 317 template <typename Container, typename ValueType, bool is_map = false> 318 struct ConvertToContainer { 319 Container operator()(const Splitter& splitter) const { 320 Container c; 321 auto it = std::inserter(c, c.end()); 322 for (const auto& sp : splitter) { 323 *it++ = ValueType(sp); 324 } 325 return c; 326 } 327 }; 328 329 // Partial specialization for a std::vector<absl::string_view>. 330 // 331 // Optimized for the common case of splitting to a 332 // std::vector<absl::string_view>. In this case we first split the results to 333 // a small array of absl::string_view on the stack, to reduce reallocations. 334 template <typename A> 335 struct ConvertToContainer<std::vector<absl::string_view, A>, 336 absl::string_view, false> { 337 std::vector<absl::string_view, A> operator()( 338 const Splitter& splitter) const { 339 struct raw_view { 340 const char* data; 341 size_t size; 342 operator absl::string_view() const { // NOLINT(runtime/explicit) 343 return {data, size}; 344 } 345 }; 346 std::vector<absl::string_view, A> v; 347 std::array<raw_view, 16> ar; 348 for (auto it = splitter.begin(); !it.at_end();) { 349 size_t index = 0; 350 do { 351 ar[index].data = it->data(); 352 ar[index].size = it->size(); 353 ++it; 354 } while (++index != ar.size() && !it.at_end()); 355 v.insert(v.end(), ar.begin(), ar.begin() + index); 356 } 357 return v; 358 } 359 }; 360 361 // Partial specialization for a std::vector<std::string>. 362 // 363 // Optimized for the common case of splitting to a std::vector<std::string>. 364 // In this case we first split the results to a std::vector<absl::string_view> 365 // so the returned std::vector<std::string> can have space reserved to avoid 366 // std::string moves. 367 template <typename A> 368 struct ConvertToContainer<std::vector<std::string, A>, std::string, false> { 369 std::vector<std::string, A> operator()(const Splitter& splitter) const { 370 const std::vector<absl::string_view> v = splitter; 371 return std::vector<std::string, A>(v.begin(), v.end()); 372 } 373 }; 374 375 // Partial specialization for containers of pairs (e.g., maps). 376 // 377 // The algorithm is to insert a new pair into the map for each even-numbered 378 // item, with the even-numbered item as the key with a default-constructed 379 // value. Each odd-numbered item will then be assigned to the last pair's 380 // value. 381 template <typename Container, typename First, typename Second> 382 struct ConvertToContainer<Container, std::pair<const First, Second>, true> { 383 using iterator = typename Container::iterator; 384 385 Container operator()(const Splitter& splitter) const { 386 Container m; 387 iterator it; 388 bool insert = true; 389 for (const absl::string_view sv : splitter) { 390 if (insert) { 391 it = InsertOrEmplace(&m, sv); 392 } else { 393 it->second = Second(sv); 394 } 395 insert = !insert; 396 } 397 return m; 398 } 399 400 // Inserts the key and an empty value into the map, returning an iterator to 401 // the inserted item. We use emplace() if available, otherwise insert(). 402 template <typename M> 403 static absl::enable_if_t<HasEmplace<M>::value, iterator> InsertOrEmplace( 404 M* m, absl::string_view key) { 405 // Use piecewise_construct to support old versions of gcc in which pair 406 // constructor can't otherwise construct string from string_view. 407 return ToIter(m->emplace(std::piecewise_construct, std::make_tuple(key), 408 std::tuple<>())); 409 } 410 template <typename M> 411 static absl::enable_if_t<!HasEmplace<M>::value, iterator> InsertOrEmplace( 412 M* m, absl::string_view key) { 413 return ToIter(m->insert(std::make_pair(First(key), Second("")))); 414 } 415 416 static iterator ToIter(std::pair<iterator, bool> pair) { 417 return pair.first; 418 } 419 static iterator ToIter(iterator iter) { return iter; } 420 }; 421 422 StringType text_; 423 Delimiter delimiter_; 424 Predicate predicate_; 425 }; 426 427 } // namespace strings_internal 428 ABSL_NAMESPACE_END 429 } // namespace absl 430 431 #endif // ABSL_STRINGS_INTERNAL_STR_SPLIT_INTERNAL_H_ 432