1 // Copyright 2017 The Abseil Authors.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // https://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14
15 #include "absl/strings/str_split.h"
16
17 #include <cstddef>
18 #include <cstdint>
19 #include <deque>
20 #include <initializer_list>
21 #include <list>
22 #include <map>
23 #include <memory>
24 #include <set>
25 #include <string>
26 #include <unordered_map>
27 #include <unordered_set>
28 #include <utility>
29 #include <vector>
30
31 #include "gmock/gmock.h"
32 #include "gtest/gtest.h"
33 #include "absl/base/macros.h"
34 #include "absl/container/btree_map.h"
35 #include "absl/container/btree_set.h"
36 #include "absl/container/flat_hash_map.h"
37 #include "absl/container/node_hash_map.h"
38 #include "absl/strings/string_view.h"
39
40 namespace {
41
42 using ::testing::ElementsAre;
43 using ::testing::IsEmpty;
44 using ::testing::Pair;
45 using ::testing::UnorderedElementsAre;
46
TEST(Split,TraitsTest)47 TEST(Split, TraitsTest) {
48 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<int>::value,
49 "");
50 static_assert(
51 !absl::strings_internal::SplitterIsConvertibleTo<std::string>::value, "");
52 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
53 std::vector<std::string>>::value,
54 "");
55 static_assert(
56 !absl::strings_internal::SplitterIsConvertibleTo<std::vector<int>>::value,
57 "");
58 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
59 std::vector<absl::string_view>>::value,
60 "");
61 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
62 std::map<std::string, std::string>>::value,
63 "");
64 static_assert(absl::strings_internal::SplitterIsConvertibleTo<
65 std::map<absl::string_view, absl::string_view>>::value,
66 "");
67 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
68 std::map<int, std::string>>::value,
69 "");
70 static_assert(!absl::strings_internal::SplitterIsConvertibleTo<
71 std::map<std::string, int>>::value,
72 "");
73 }
74
75 // This tests the overall split API, which is made up of the absl::StrSplit()
76 // function and the Delimiter objects in the absl:: namespace.
77 // This TEST macro is outside of any namespace to require full specification of
78 // namespaces just like callers will need to use.
TEST(Split,APIExamples)79 TEST(Split, APIExamples) {
80 {
81 // Passes string delimiter. Assumes the default of ByString.
82 std::vector<std::string> v = absl::StrSplit("a,b,c", ","); // NOLINT
83 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
84
85 // Equivalent to...
86 using absl::ByString;
87 v = absl::StrSplit("a,b,c", ByString(","));
88 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
89
90 // Equivalent to...
91 EXPECT_THAT(absl::StrSplit("a,b,c", ByString(",")),
92 ElementsAre("a", "b", "c"));
93 }
94
95 {
96 // Same as above, but using a single character as the delimiter.
97 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
98 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
99
100 // Equivalent to...
101 using absl::ByChar;
102 v = absl::StrSplit("a,b,c", ByChar(','));
103 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
104 }
105
106 {
107 // Uses the Literal string "=>" as the delimiter.
108 const std::vector<std::string> v = absl::StrSplit("a=>b=>c", "=>");
109 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
110 }
111
112 {
113 // The substrings are returned as string_views, eliminating copying.
114 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
115 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
116 }
117
118 {
119 // Leading and trailing empty substrings.
120 std::vector<std::string> v = absl::StrSplit(",a,b,c,", ',');
121 EXPECT_THAT(v, ElementsAre("", "a", "b", "c", ""));
122 }
123
124 {
125 // Splits on a delimiter that is not found.
126 std::vector<std::string> v = absl::StrSplit("abc", ',');
127 EXPECT_THAT(v, ElementsAre("abc"));
128 }
129
130 {
131 // Splits the input string into individual characters by using an empty
132 // string as the delimiter.
133 std::vector<std::string> v = absl::StrSplit("abc", "");
134 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
135 }
136
137 {
138 // Splits string data with embedded NUL characters, using NUL as the
139 // delimiter. A simple delimiter of "\0" doesn't work because strlen() will
140 // say that's the empty string when constructing the absl::string_view
141 // delimiter. Instead, a non-empty string containing NUL can be used as the
142 // delimiter.
143 std::string embedded_nulls("a\0b\0c", 5);
144 std::string null_delim("\0", 1);
145 std::vector<std::string> v = absl::StrSplit(embedded_nulls, null_delim);
146 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
147 }
148
149 {
150 // Stores first two split strings as the members in a std::pair.
151 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
152 EXPECT_EQ("a", p.first);
153 EXPECT_EQ("b", p.second);
154 // "c" is omitted because std::pair can hold only two elements.
155 }
156
157 {
158 // Results stored in std::set<std::string>
159 std::set<std::string> v = absl::StrSplit("a,b,c,a,b,c,a,b,c", ',');
160 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
161 }
162
163 {
164 // Uses a non-const char* delimiter.
165 char a[] = ",";
166 char* d = a + 0;
167 std::vector<std::string> v = absl::StrSplit("a,b,c", d);
168 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
169 }
170
171 {
172 // Results split using either of , or ;
173 using absl::ByAnyChar;
174 std::vector<std::string> v = absl::StrSplit("a,b;c", ByAnyChar(",;"));
175 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
176 }
177
178 {
179 // Uses the SkipWhitespace predicate.
180 using absl::SkipWhitespace;
181 std::vector<std::string> v =
182 absl::StrSplit(" a , ,,b,", ',', SkipWhitespace());
183 EXPECT_THAT(v, ElementsAre(" a ", "b"));
184 }
185
186 {
187 // Uses the ByLength delimiter.
188 using absl::ByLength;
189 std::vector<std::string> v = absl::StrSplit("abcdefg", ByLength(3));
190 EXPECT_THAT(v, ElementsAre("abc", "def", "g"));
191 }
192
193 {
194 // Different forms of initialization / conversion.
195 std::vector<std::string> v1 = absl::StrSplit("a,b,c", ',');
196 EXPECT_THAT(v1, ElementsAre("a", "b", "c"));
197 std::vector<std::string> v2(absl::StrSplit("a,b,c", ','));
198 EXPECT_THAT(v2, ElementsAre("a", "b", "c"));
199 auto v3 = std::vector<std::string>(absl::StrSplit("a,b,c", ','));
200 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
201 v3 = absl::StrSplit("a,b,c", ',');
202 EXPECT_THAT(v3, ElementsAre("a", "b", "c"));
203 }
204
205 {
206 // Results stored in a std::map.
207 std::map<std::string, std::string> m = absl::StrSplit("a,1,b,2,a,3", ',');
208 EXPECT_EQ(2, m.size());
209 EXPECT_EQ("3", m["a"]);
210 EXPECT_EQ("2", m["b"]);
211 }
212
213 {
214 // Results stored in a std::multimap.
215 std::multimap<std::string, std::string> m =
216 absl::StrSplit("a,1,b,2,a,3", ',');
217 EXPECT_EQ(3, m.size());
218 auto it = m.find("a");
219 EXPECT_EQ("1", it->second);
220 ++it;
221 EXPECT_EQ("3", it->second);
222 it = m.find("b");
223 EXPECT_EQ("2", it->second);
224 }
225
226 {
227 // Demonstrates use in a range-based for loop in C++11.
228 std::string s = "x,x,x,x,x,x,x";
229 for (absl::string_view sp : absl::StrSplit(s, ',')) {
230 EXPECT_EQ("x", sp);
231 }
232 }
233
234 {
235 // Demonstrates use with a Predicate in a range-based for loop.
236 using absl::SkipWhitespace;
237 std::string s = " ,x,,x,,x,x,x,,";
238 for (absl::string_view sp : absl::StrSplit(s, ',', SkipWhitespace())) {
239 EXPECT_EQ("x", sp);
240 }
241 }
242
243 {
244 // Demonstrates a "smart" split to std::map using two separate calls to
245 // absl::StrSplit. One call to split the records, and another call to split
246 // the keys and values. This also uses the Limit delimiter so that the
247 // std::string "a=b=c" will split to "a" -> "b=c".
248 std::map<std::string, std::string> m;
249 for (absl::string_view sp : absl::StrSplit("a=b=c,d=e,f=,g", ',')) {
250 m.insert(absl::StrSplit(sp, absl::MaxSplits('=', 1)));
251 }
252 EXPECT_EQ("b=c", m.find("a")->second);
253 EXPECT_EQ("e", m.find("d")->second);
254 EXPECT_EQ("", m.find("f")->second);
255 EXPECT_EQ("", m.find("g")->second);
256 }
257 }
258
259 //
260 // Tests for SplitIterator
261 //
262
TEST(SplitIterator,Basics)263 TEST(SplitIterator, Basics) {
264 auto splitter = absl::StrSplit("a,b", ',');
265 auto it = splitter.begin();
266 auto end = splitter.end();
267
268 EXPECT_NE(it, end);
269 EXPECT_EQ("a", *it); // tests dereference
270 ++it; // tests preincrement
271 EXPECT_NE(it, end);
272 EXPECT_EQ("b",
273 std::string(it->data(), it->size())); // tests dereference as ptr
274 it++; // tests postincrement
275 EXPECT_EQ(it, end);
276 }
277
278 // Simple Predicate to skip a particular string.
279 class Skip {
280 public:
Skip(const std::string & s)281 explicit Skip(const std::string& s) : s_(s) {}
operator ()(absl::string_view sp)282 bool operator()(absl::string_view sp) { return sp != s_; }
283
284 private:
285 std::string s_;
286 };
287
TEST(SplitIterator,Predicate)288 TEST(SplitIterator, Predicate) {
289 auto splitter = absl::StrSplit("a,b,c", ',', Skip("b"));
290 auto it = splitter.begin();
291 auto end = splitter.end();
292
293 EXPECT_NE(it, end);
294 EXPECT_EQ("a", *it); // tests dereference
295 ++it; // tests preincrement -- "b" should be skipped here.
296 EXPECT_NE(it, end);
297 EXPECT_EQ("c",
298 std::string(it->data(), it->size())); // tests dereference as ptr
299 it++; // tests postincrement
300 EXPECT_EQ(it, end);
301 }
302
TEST(SplitIterator,EdgeCases)303 TEST(SplitIterator, EdgeCases) {
304 // Expected input and output, assuming a delimiter of ','
305 struct {
306 std::string in;
307 std::vector<std::string> expect;
308 } specs[] = {
309 {"", {""}},
310 {"foo", {"foo"}},
311 {",", {"", ""}},
312 {",foo", {"", "foo"}},
313 {"foo,", {"foo", ""}},
314 {",foo,", {"", "foo", ""}},
315 {"foo,bar", {"foo", "bar"}},
316 };
317
318 for (const auto& spec : specs) {
319 SCOPED_TRACE(spec.in);
320 auto splitter = absl::StrSplit(spec.in, ',');
321 auto it = splitter.begin();
322 auto end = splitter.end();
323 for (const auto& expected : spec.expect) {
324 EXPECT_NE(it, end);
325 EXPECT_EQ(expected, *it++);
326 }
327 EXPECT_EQ(it, end);
328 }
329 }
330
TEST(Splitter,Const)331 TEST(Splitter, Const) {
332 const auto splitter = absl::StrSplit("a,b,c", ',');
333 EXPECT_THAT(splitter, ElementsAre("a", "b", "c"));
334 }
335
TEST(Split,EmptyAndNull)336 TEST(Split, EmptyAndNull) {
337 // Attention: Splitting a null absl::string_view is different than splitting
338 // an empty absl::string_view even though both string_views are considered
339 // equal. This behavior is likely surprising and undesirable. However, to
340 // maintain backward compatibility, there is a small "hack" in
341 // str_split_internal.h that preserves this behavior. If that behavior is ever
342 // changed/fixed, this test will need to be updated.
343 EXPECT_THAT(absl::StrSplit(absl::string_view(""), '-'), ElementsAre(""));
344 EXPECT_THAT(absl::StrSplit(absl::string_view(), '-'), ElementsAre());
345 }
346
TEST(SplitIterator,EqualityAsEndCondition)347 TEST(SplitIterator, EqualityAsEndCondition) {
348 auto splitter = absl::StrSplit("a,b,c", ',');
349 auto it = splitter.begin();
350 auto it2 = it;
351
352 // Increments it2 twice to point to "c" in the input text.
353 ++it2;
354 ++it2;
355 EXPECT_EQ("c", *it2);
356
357 // This test uses a non-end SplitIterator as the terminating condition in a
358 // for loop. This relies on SplitIterator equality for non-end SplitIterators
359 // working correctly. At this point it2 points to "c", and we use that as the
360 // "end" condition in this test.
361 std::vector<absl::string_view> v;
362 for (; it != it2; ++it) {
363 v.push_back(*it);
364 }
365 EXPECT_THAT(v, ElementsAre("a", "b"));
366 }
367
368 //
369 // Tests for Splitter
370 //
371
TEST(Splitter,RangeIterators)372 TEST(Splitter, RangeIterators) {
373 auto splitter = absl::StrSplit("a,b,c", ',');
374 std::vector<absl::string_view> output;
375 for (absl::string_view p : splitter) {
376 output.push_back(p);
377 }
378 EXPECT_THAT(output, ElementsAre("a", "b", "c"));
379 }
380
381 // Some template functions for use in testing conversion operators
382 template <typename ContainerType, typename Splitter>
TestConversionOperator(const Splitter & splitter)383 void TestConversionOperator(const Splitter& splitter) {
384 ContainerType output = splitter;
385 EXPECT_THAT(output, UnorderedElementsAre("a", "b", "c", "d"));
386 }
387
388 template <typename MapType, typename Splitter>
TestMapConversionOperator(const Splitter & splitter)389 void TestMapConversionOperator(const Splitter& splitter) {
390 MapType m = splitter;
391 EXPECT_THAT(m, UnorderedElementsAre(Pair("a", "b"), Pair("c", "d")));
392 }
393
394 template <typename FirstType, typename SecondType, typename Splitter>
TestPairConversionOperator(const Splitter & splitter)395 void TestPairConversionOperator(const Splitter& splitter) {
396 std::pair<FirstType, SecondType> p = splitter;
397 EXPECT_EQ(p, (std::pair<FirstType, SecondType>("a", "b")));
398 }
399
TEST(Splitter,ConversionOperator)400 TEST(Splitter, ConversionOperator) {
401 auto splitter = absl::StrSplit("a,b,c,d", ',');
402
403 TestConversionOperator<std::vector<absl::string_view>>(splitter);
404 TestConversionOperator<std::vector<std::string>>(splitter);
405 TestConversionOperator<std::list<absl::string_view>>(splitter);
406 TestConversionOperator<std::list<std::string>>(splitter);
407 TestConversionOperator<std::deque<absl::string_view>>(splitter);
408 TestConversionOperator<std::deque<std::string>>(splitter);
409 TestConversionOperator<std::set<absl::string_view>>(splitter);
410 TestConversionOperator<std::set<std::string>>(splitter);
411 TestConversionOperator<std::multiset<absl::string_view>>(splitter);
412 TestConversionOperator<std::multiset<std::string>>(splitter);
413 TestConversionOperator<absl::btree_set<absl::string_view>>(splitter);
414 TestConversionOperator<absl::btree_set<std::string>>(splitter);
415 TestConversionOperator<absl::btree_multiset<absl::string_view>>(splitter);
416 TestConversionOperator<absl::btree_multiset<std::string>>(splitter);
417 TestConversionOperator<std::unordered_set<std::string>>(splitter);
418
419 // Tests conversion to map-like objects.
420
421 TestMapConversionOperator<std::map<absl::string_view, absl::string_view>>(
422 splitter);
423 TestMapConversionOperator<std::map<absl::string_view, std::string>>(splitter);
424 TestMapConversionOperator<std::map<std::string, absl::string_view>>(splitter);
425 TestMapConversionOperator<std::map<std::string, std::string>>(splitter);
426 TestMapConversionOperator<
427 std::multimap<absl::string_view, absl::string_view>>(splitter);
428 TestMapConversionOperator<std::multimap<absl::string_view, std::string>>(
429 splitter);
430 TestMapConversionOperator<std::multimap<std::string, absl::string_view>>(
431 splitter);
432 TestMapConversionOperator<std::multimap<std::string, std::string>>(splitter);
433 TestMapConversionOperator<
434 absl::btree_map<absl::string_view, absl::string_view>>(splitter);
435 TestMapConversionOperator<absl::btree_map<absl::string_view, std::string>>(
436 splitter);
437 TestMapConversionOperator<absl::btree_map<std::string, absl::string_view>>(
438 splitter);
439 TestMapConversionOperator<absl::btree_map<std::string, std::string>>(
440 splitter);
441 TestMapConversionOperator<
442 absl::btree_multimap<absl::string_view, absl::string_view>>(splitter);
443 TestMapConversionOperator<
444 absl::btree_multimap<absl::string_view, std::string>>(splitter);
445 TestMapConversionOperator<
446 absl::btree_multimap<std::string, absl::string_view>>(splitter);
447 TestMapConversionOperator<absl::btree_multimap<std::string, std::string>>(
448 splitter);
449 TestMapConversionOperator<std::unordered_map<std::string, std::string>>(
450 splitter);
451 TestMapConversionOperator<
452 absl::node_hash_map<absl::string_view, absl::string_view>>(splitter);
453 TestMapConversionOperator<
454 absl::node_hash_map<absl::string_view, std::string>>(splitter);
455 TestMapConversionOperator<
456 absl::node_hash_map<std::string, absl::string_view>>(splitter);
457 TestMapConversionOperator<
458 absl::flat_hash_map<absl::string_view, absl::string_view>>(splitter);
459 TestMapConversionOperator<
460 absl::flat_hash_map<absl::string_view, std::string>>(splitter);
461 TestMapConversionOperator<
462 absl::flat_hash_map<std::string, absl::string_view>>(splitter);
463
464 // Tests conversion to std::pair
465
466 TestPairConversionOperator<absl::string_view, absl::string_view>(splitter);
467 TestPairConversionOperator<absl::string_view, std::string>(splitter);
468 TestPairConversionOperator<std::string, absl::string_view>(splitter);
469 TestPairConversionOperator<std::string, std::string>(splitter);
470 }
471
472 // A few additional tests for conversion to std::pair. This conversion is
473 // different from others because a std::pair always has exactly two elements:
474 // .first and .second. The split has to work even when the split has
475 // less-than, equal-to, and more-than 2 strings.
TEST(Splitter,ToPair)476 TEST(Splitter, ToPair) {
477 {
478 // Empty string
479 std::pair<std::string, std::string> p = absl::StrSplit("", ',');
480 EXPECT_EQ("", p.first);
481 EXPECT_EQ("", p.second);
482 }
483
484 {
485 // Only first
486 std::pair<std::string, std::string> p = absl::StrSplit("a", ',');
487 EXPECT_EQ("a", p.first);
488 EXPECT_EQ("", p.second);
489 }
490
491 {
492 // Only second
493 std::pair<std::string, std::string> p = absl::StrSplit(",b", ',');
494 EXPECT_EQ("", p.first);
495 EXPECT_EQ("b", p.second);
496 }
497
498 {
499 // First and second.
500 std::pair<std::string, std::string> p = absl::StrSplit("a,b", ',');
501 EXPECT_EQ("a", p.first);
502 EXPECT_EQ("b", p.second);
503 }
504
505 {
506 // First and second and then more stuff that will be ignored.
507 std::pair<std::string, std::string> p = absl::StrSplit("a,b,c", ',');
508 EXPECT_EQ("a", p.first);
509 EXPECT_EQ("b", p.second);
510 // "c" is omitted.
511 }
512 }
513
TEST(Splitter,Predicates)514 TEST(Splitter, Predicates) {
515 static const char kTestChars[] = ",a, ,b,";
516 using absl::AllowEmpty;
517 using absl::SkipEmpty;
518 using absl::SkipWhitespace;
519
520 {
521 // No predicate. Does not skip empties.
522 auto splitter = absl::StrSplit(kTestChars, ',');
523 std::vector<std::string> v = splitter;
524 EXPECT_THAT(v, ElementsAre("", "a", " ", "b", ""));
525 }
526
527 {
528 // Allows empty strings. Same behavior as no predicate at all.
529 auto splitter = absl::StrSplit(kTestChars, ',', AllowEmpty());
530 std::vector<std::string> v_allowempty = splitter;
531 EXPECT_THAT(v_allowempty, ElementsAre("", "a", " ", "b", ""));
532
533 // Ensures AllowEmpty equals the behavior with no predicate.
534 auto splitter_nopredicate = absl::StrSplit(kTestChars, ',');
535 std::vector<std::string> v_nopredicate = splitter_nopredicate;
536 EXPECT_EQ(v_allowempty, v_nopredicate);
537 }
538
539 {
540 // Skips empty strings.
541 auto splitter = absl::StrSplit(kTestChars, ',', SkipEmpty());
542 std::vector<std::string> v = splitter;
543 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
544 }
545
546 {
547 // Skips empty and all-whitespace strings.
548 auto splitter = absl::StrSplit(kTestChars, ',', SkipWhitespace());
549 std::vector<std::string> v = splitter;
550 EXPECT_THAT(v, ElementsAre("a", "b"));
551 }
552 }
553
554 //
555 // Tests for StrSplit()
556 //
557
TEST(Split,Basics)558 TEST(Split, Basics) {
559 {
560 // Doesn't really do anything useful because the return value is ignored,
561 // but it should work.
562 absl::StrSplit("a,b,c", ',');
563 }
564
565 {
566 std::vector<absl::string_view> v = absl::StrSplit("a,b,c", ',');
567 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
568 }
569
570 {
571 std::vector<std::string> v = absl::StrSplit("a,b,c", ',');
572 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
573 }
574
575 {
576 // Ensures that assignment works. This requires a little extra work with
577 // C++11 because of overloads with initializer_list.
578 std::vector<std::string> v;
579 v = absl::StrSplit("a,b,c", ',');
580
581 EXPECT_THAT(v, ElementsAre("a", "b", "c"));
582 std::map<std::string, std::string> m;
583 m = absl::StrSplit("a,b,c", ',');
584 EXPECT_EQ(2, m.size());
585 std::unordered_map<std::string, std::string> hm;
586 hm = absl::StrSplit("a,b,c", ',');
587 EXPECT_EQ(2, hm.size());
588 }
589 }
590
ReturnStringView()591 absl::string_view ReturnStringView() { return "Hello World"; }
ReturnConstCharP()592 const char* ReturnConstCharP() { return "Hello World"; }
ReturnCharP()593 char* ReturnCharP() { return const_cast<char*>("Hello World"); }
594
TEST(Split,AcceptsCertainTemporaries)595 TEST(Split, AcceptsCertainTemporaries) {
596 std::vector<std::string> v;
597 v = absl::StrSplit(ReturnStringView(), ' ');
598 EXPECT_THAT(v, ElementsAre("Hello", "World"));
599 v = absl::StrSplit(ReturnConstCharP(), ' ');
600 EXPECT_THAT(v, ElementsAre("Hello", "World"));
601 v = absl::StrSplit(ReturnCharP(), ' ');
602 EXPECT_THAT(v, ElementsAre("Hello", "World"));
603 }
604
TEST(Split,Temporary)605 TEST(Split, Temporary) {
606 // Use a std::string longer than the SSO length, so that when the temporary is
607 // destroyed, if the splitter keeps a reference to the string's contents,
608 // it'll reference freed memory instead of just dead on-stack memory.
609 const char input[] = "a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u";
610 EXPECT_LT(sizeof(std::string), ABSL_ARRAYSIZE(input))
611 << "Input should be larger than fits on the stack.";
612
613 // This happens more often in C++11 as part of a range-based for loop.
614 auto splitter = absl::StrSplit(std::string(input), ',');
615 std::string expected = "a";
616 for (absl::string_view letter : splitter) {
617 EXPECT_EQ(expected, letter);
618 ++expected[0];
619 }
620 EXPECT_EQ("v", expected);
621
622 // This happens more often in C++11 as part of a range-based for loop.
623 auto std_splitter = absl::StrSplit(std::string(input), ',');
624 expected = "a";
625 for (absl::string_view letter : std_splitter) {
626 EXPECT_EQ(expected, letter);
627 ++expected[0];
628 }
629 EXPECT_EQ("v", expected);
630 }
631
632 template <typename T>
CopyToHeap(const T & value)633 static std::unique_ptr<T> CopyToHeap(const T& value) {
634 return std::unique_ptr<T>(new T(value));
635 }
636
TEST(Split,LvalueCaptureIsCopyable)637 TEST(Split, LvalueCaptureIsCopyable) {
638 std::string input = "a,b";
639 auto heap_splitter = CopyToHeap(absl::StrSplit(input, ','));
640 auto stack_splitter = *heap_splitter;
641 heap_splitter.reset();
642 std::vector<std::string> result = stack_splitter;
643 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
644 }
645
TEST(Split,TemporaryCaptureIsCopyable)646 TEST(Split, TemporaryCaptureIsCopyable) {
647 auto heap_splitter = CopyToHeap(absl::StrSplit(std::string("a,b"), ','));
648 auto stack_splitter = *heap_splitter;
649 heap_splitter.reset();
650 std::vector<std::string> result = stack_splitter;
651 EXPECT_THAT(result, testing::ElementsAre("a", "b"));
652 }
653
TEST(Split,SplitterIsCopyableAndMoveable)654 TEST(Split, SplitterIsCopyableAndMoveable) {
655 auto a = absl::StrSplit("foo", '-');
656
657 // Ensures that the following expressions compile.
658 auto b = a; // Copy construct
659 auto c = std::move(a); // Move construct
660 b = c; // Copy assign
661 c = std::move(b); // Move assign
662
663 EXPECT_THAT(c, ElementsAre("foo"));
664 }
665
TEST(Split,StringDelimiter)666 TEST(Split, StringDelimiter) {
667 {
668 std::vector<absl::string_view> v = absl::StrSplit("a,b", ',');
669 EXPECT_THAT(v, ElementsAre("a", "b"));
670 }
671
672 {
673 std::vector<absl::string_view> v = absl::StrSplit("a,b", std::string(","));
674 EXPECT_THAT(v, ElementsAre("a", "b"));
675 }
676
677 {
678 std::vector<absl::string_view> v =
679 absl::StrSplit("a,b", absl::string_view(","));
680 EXPECT_THAT(v, ElementsAre("a", "b"));
681 }
682 }
683
684 #if !defined(__cpp_char8_t)
685 #if defined(__clang__)
686 #pragma clang diagnostic push
687 #pragma clang diagnostic ignored "-Wc++2a-compat"
688 #endif
TEST(Split,UTF8)689 TEST(Split, UTF8) {
690 // Tests splitting utf8 strings and utf8 delimiters.
691 std::string utf8_string = u8"\u03BA\u1F79\u03C3\u03BC\u03B5";
692 {
693 // A utf8 input string with an ascii delimiter.
694 std::string to_split = "a," + utf8_string;
695 std::vector<absl::string_view> v = absl::StrSplit(to_split, ',');
696 EXPECT_THAT(v, ElementsAre("a", utf8_string));
697 }
698
699 {
700 // A utf8 input string and a utf8 delimiter.
701 std::string to_split = "a," + utf8_string + ",b";
702 std::string unicode_delimiter = "," + utf8_string + ",";
703 std::vector<absl::string_view> v =
704 absl::StrSplit(to_split, unicode_delimiter);
705 EXPECT_THAT(v, ElementsAre("a", "b"));
706 }
707
708 {
709 // A utf8 input string and ByAnyChar with ascii chars.
710 std::vector<absl::string_view> v =
711 absl::StrSplit(u8"Foo h\u00E4llo th\u4E1Ere", absl::ByAnyChar(" \t"));
712 EXPECT_THAT(v, ElementsAre("Foo", u8"h\u00E4llo", u8"th\u4E1Ere"));
713 }
714 }
715 #if defined(__clang__)
716 #pragma clang diagnostic pop
717 #endif
718 #endif // !defined(__cpp_char8_t)
719
TEST(Split,EmptyStringDelimiter)720 TEST(Split, EmptyStringDelimiter) {
721 {
722 std::vector<std::string> v = absl::StrSplit("", "");
723 EXPECT_THAT(v, ElementsAre(""));
724 }
725
726 {
727 std::vector<std::string> v = absl::StrSplit("a", "");
728 EXPECT_THAT(v, ElementsAre("a"));
729 }
730
731 {
732 std::vector<std::string> v = absl::StrSplit("ab", "");
733 EXPECT_THAT(v, ElementsAre("a", "b"));
734 }
735
736 {
737 std::vector<std::string> v = absl::StrSplit("a b", "");
738 EXPECT_THAT(v, ElementsAre("a", " ", "b"));
739 }
740 }
741
TEST(Split,SubstrDelimiter)742 TEST(Split, SubstrDelimiter) {
743 std::vector<absl::string_view> results;
744 absl::string_view delim("//");
745
746 results = absl::StrSplit("", delim);
747 EXPECT_THAT(results, ElementsAre(""));
748
749 results = absl::StrSplit("//", delim);
750 EXPECT_THAT(results, ElementsAre("", ""));
751
752 results = absl::StrSplit("ab", delim);
753 EXPECT_THAT(results, ElementsAre("ab"));
754
755 results = absl::StrSplit("ab//", delim);
756 EXPECT_THAT(results, ElementsAre("ab", ""));
757
758 results = absl::StrSplit("ab/", delim);
759 EXPECT_THAT(results, ElementsAre("ab/"));
760
761 results = absl::StrSplit("a/b", delim);
762 EXPECT_THAT(results, ElementsAre("a/b"));
763
764 results = absl::StrSplit("a//b", delim);
765 EXPECT_THAT(results, ElementsAre("a", "b"));
766
767 results = absl::StrSplit("a///b", delim);
768 EXPECT_THAT(results, ElementsAre("a", "/b"));
769
770 results = absl::StrSplit("a////b", delim);
771 EXPECT_THAT(results, ElementsAre("a", "", "b"));
772 }
773
TEST(Split,EmptyResults)774 TEST(Split, EmptyResults) {
775 std::vector<absl::string_view> results;
776
777 results = absl::StrSplit("", '#');
778 EXPECT_THAT(results, ElementsAre(""));
779
780 results = absl::StrSplit("#", '#');
781 EXPECT_THAT(results, ElementsAre("", ""));
782
783 results = absl::StrSplit("#cd", '#');
784 EXPECT_THAT(results, ElementsAre("", "cd"));
785
786 results = absl::StrSplit("ab#cd#", '#');
787 EXPECT_THAT(results, ElementsAre("ab", "cd", ""));
788
789 results = absl::StrSplit("ab##cd", '#');
790 EXPECT_THAT(results, ElementsAre("ab", "", "cd"));
791
792 results = absl::StrSplit("ab##", '#');
793 EXPECT_THAT(results, ElementsAre("ab", "", ""));
794
795 results = absl::StrSplit("ab#ab#", '#');
796 EXPECT_THAT(results, ElementsAre("ab", "ab", ""));
797
798 results = absl::StrSplit("aaaa", 'a');
799 EXPECT_THAT(results, ElementsAre("", "", "", "", ""));
800
801 results = absl::StrSplit("", '#', absl::SkipEmpty());
802 EXPECT_THAT(results, ElementsAre());
803 }
804
805 template <typename Delimiter>
IsFoundAtStartingPos(absl::string_view text,Delimiter d,size_t starting_pos,int expected_pos)806 static bool IsFoundAtStartingPos(absl::string_view text, Delimiter d,
807 size_t starting_pos, int expected_pos) {
808 absl::string_view found = d.Find(text, starting_pos);
809 return found.data() != text.data() + text.size() &&
810 expected_pos == found.data() - text.data();
811 }
812
813 // Helper function for testing Delimiter objects. Returns true if the given
814 // Delimiter is found in the given string at the given position. This function
815 // tests two cases:
816 // 1. The actual text given, staring at position 0
817 // 2. The text given with leading padding that should be ignored
818 template <typename Delimiter>
IsFoundAt(absl::string_view text,Delimiter d,int expected_pos)819 static bool IsFoundAt(absl::string_view text, Delimiter d, int expected_pos) {
820 const std::string leading_text = ",x,y,z,";
821 return IsFoundAtStartingPos(text, d, 0, expected_pos) &&
822 IsFoundAtStartingPos(leading_text + std::string(text), d,
823 leading_text.length(),
824 expected_pos + leading_text.length());
825 }
826
827 //
828 // Tests for ByString
829 //
830
831 // Tests using any delimiter that represents a single comma.
832 template <typename Delimiter>
TestComma(Delimiter d)833 void TestComma(Delimiter d) {
834 EXPECT_TRUE(IsFoundAt(",", d, 0));
835 EXPECT_TRUE(IsFoundAt("a,", d, 1));
836 EXPECT_TRUE(IsFoundAt(",b", d, 0));
837 EXPECT_TRUE(IsFoundAt("a,b", d, 1));
838 EXPECT_TRUE(IsFoundAt("a,b,", d, 1));
839 EXPECT_TRUE(IsFoundAt("a,b,c", d, 1));
840 EXPECT_FALSE(IsFoundAt("", d, -1));
841 EXPECT_FALSE(IsFoundAt(" ", d, -1));
842 EXPECT_FALSE(IsFoundAt("a", d, -1));
843 EXPECT_FALSE(IsFoundAt("a b c", d, -1));
844 EXPECT_FALSE(IsFoundAt("a;b;c", d, -1));
845 EXPECT_FALSE(IsFoundAt(";", d, -1));
846 }
847
TEST(Delimiter,ByString)848 TEST(Delimiter, ByString) {
849 using absl::ByString;
850 TestComma(ByString(","));
851
852 // Works as named variable.
853 ByString comma_string(",");
854 TestComma(comma_string);
855
856 // The first occurrence of empty string ("") in a string is at position 0.
857 // There is a test below that demonstrates this for absl::string_view::find().
858 // If the ByString delimiter returned position 0 for this, there would
859 // be an infinite loop in the SplitIterator code. To avoid this, empty string
860 // is a special case in that it always returns the item at position 1.
861 absl::string_view abc("abc");
862 EXPECT_EQ(0, abc.find("")); // "" is found at position 0
863 ByString empty("");
864 EXPECT_FALSE(IsFoundAt("", empty, 0));
865 EXPECT_FALSE(IsFoundAt("a", empty, 0));
866 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
867 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
868 }
869
TEST(Split,ByChar)870 TEST(Split, ByChar) {
871 using absl::ByChar;
872 TestComma(ByChar(','));
873
874 // Works as named variable.
875 ByChar comma_char(',');
876 TestComma(comma_char);
877 }
878
879 //
880 // Tests for ByAnyChar
881 //
882
TEST(Delimiter,ByAnyChar)883 TEST(Delimiter, ByAnyChar) {
884 using absl::ByAnyChar;
885 ByAnyChar one_delim(",");
886 // Found
887 EXPECT_TRUE(IsFoundAt(",", one_delim, 0));
888 EXPECT_TRUE(IsFoundAt("a,", one_delim, 1));
889 EXPECT_TRUE(IsFoundAt("a,b", one_delim, 1));
890 EXPECT_TRUE(IsFoundAt(",b", one_delim, 0));
891 // Not found
892 EXPECT_FALSE(IsFoundAt("", one_delim, -1));
893 EXPECT_FALSE(IsFoundAt(" ", one_delim, -1));
894 EXPECT_FALSE(IsFoundAt("a", one_delim, -1));
895 EXPECT_FALSE(IsFoundAt("a;b;c", one_delim, -1));
896 EXPECT_FALSE(IsFoundAt(";", one_delim, -1));
897
898 ByAnyChar two_delims(",;");
899 // Found
900 EXPECT_TRUE(IsFoundAt(",", two_delims, 0));
901 EXPECT_TRUE(IsFoundAt(";", two_delims, 0));
902 EXPECT_TRUE(IsFoundAt(",;", two_delims, 0));
903 EXPECT_TRUE(IsFoundAt(";,", two_delims, 0));
904 EXPECT_TRUE(IsFoundAt(",;b", two_delims, 0));
905 EXPECT_TRUE(IsFoundAt(";,b", two_delims, 0));
906 EXPECT_TRUE(IsFoundAt("a;,", two_delims, 1));
907 EXPECT_TRUE(IsFoundAt("a,;", two_delims, 1));
908 EXPECT_TRUE(IsFoundAt("a;,b", two_delims, 1));
909 EXPECT_TRUE(IsFoundAt("a,;b", two_delims, 1));
910 // Not found
911 EXPECT_FALSE(IsFoundAt("", two_delims, -1));
912 EXPECT_FALSE(IsFoundAt(" ", two_delims, -1));
913 EXPECT_FALSE(IsFoundAt("a", two_delims, -1));
914 EXPECT_FALSE(IsFoundAt("a=b=c", two_delims, -1));
915 EXPECT_FALSE(IsFoundAt("=", two_delims, -1));
916
917 // ByAnyChar behaves just like ByString when given a delimiter of empty
918 // string. That is, it always returns a zero-length absl::string_view
919 // referring to the item at position 1, not position 0.
920 ByAnyChar empty("");
921 EXPECT_FALSE(IsFoundAt("", empty, 0));
922 EXPECT_FALSE(IsFoundAt("a", empty, 0));
923 EXPECT_TRUE(IsFoundAt("ab", empty, 1));
924 EXPECT_TRUE(IsFoundAt("abc", empty, 1));
925 }
926
927 //
928 // Tests for ByAsciiWhitespace
929 //
TEST(Split,ByAsciiWhitespace)930 TEST(Split, ByAsciiWhitespace) {
931 using absl::ByAsciiWhitespace;
932 using absl::SkipEmpty;
933 std::vector<absl::string_view> results;
934
935 results = absl::StrSplit("aaaa\n", ByAsciiWhitespace());
936 EXPECT_THAT(results, ElementsAre("aaaa", ""));
937
938 results = absl::StrSplit("aaaa\n", ByAsciiWhitespace(), SkipEmpty());
939 EXPECT_THAT(results, ElementsAre("aaaa"));
940
941 results = absl::StrSplit(" ", ByAsciiWhitespace());
942 EXPECT_THAT(results, ElementsAre("", ""));
943
944 results = absl::StrSplit(" ", ByAsciiWhitespace(), SkipEmpty());
945 EXPECT_THAT(results, IsEmpty());
946
947 results = absl::StrSplit("a", ByAsciiWhitespace());
948 EXPECT_THAT(results, ElementsAre("a"));
949
950 results = absl::StrSplit("", ByAsciiWhitespace());
951 EXPECT_THAT(results, ElementsAre(""));
952
953 results = absl::StrSplit("", ByAsciiWhitespace(), SkipEmpty());
954 EXPECT_THAT(results, IsEmpty());
955
956 results = absl::StrSplit("a b\tc\n d\n", ByAsciiWhitespace());
957 EXPECT_THAT(results, ElementsAre("a", "b", "c", "", "", "d", ""));
958
959 results = absl::StrSplit("a b\tc\n d \n", ByAsciiWhitespace(), SkipEmpty());
960 EXPECT_THAT(results, ElementsAre("a", "b", "c", "d"));
961
962 results = absl::StrSplit("a\t\n\v\f\r b", ByAsciiWhitespace(), SkipEmpty());
963 EXPECT_THAT(results, ElementsAre("a", "b"));
964 }
965
966 //
967 // Tests for ByLength
968 //
969
TEST(Delimiter,ByLength)970 TEST(Delimiter, ByLength) {
971 using absl::ByLength;
972
973 ByLength four_char_delim(4);
974
975 // Found
976 EXPECT_TRUE(IsFoundAt("abcde", four_char_delim, 4));
977 EXPECT_TRUE(IsFoundAt("abcdefghijklmnopqrstuvwxyz", four_char_delim, 4));
978 EXPECT_TRUE(IsFoundAt("a b,c\nd", four_char_delim, 4));
979 // Not found
980 EXPECT_FALSE(IsFoundAt("", four_char_delim, 0));
981 EXPECT_FALSE(IsFoundAt("a", four_char_delim, 0));
982 EXPECT_FALSE(IsFoundAt("ab", four_char_delim, 0));
983 EXPECT_FALSE(IsFoundAt("abc", four_char_delim, 0));
984 EXPECT_FALSE(IsFoundAt("abcd", four_char_delim, 0));
985 }
986
TEST(Split,WorksWithLargeStrings)987 TEST(Split, WorksWithLargeStrings) {
988 #if defined(ABSL_HAVE_ADDRESS_SANITIZER) || \
989 defined(ABSL_HAVE_MEMORY_SANITIZER) || defined(ABSL_HAVE_THREAD_SANITIZER)
990 constexpr size_t kSize = (uint32_t{1} << 26) + 1; // 64M + 1 byte
991 #else
992 constexpr size_t kSize = (uint32_t{1} << 31) + 1; // 2G + 1 byte
993 #endif
994 if (sizeof(size_t) > 4) {
995 std::string s(kSize, 'x');
996 s.back() = '-';
997 std::vector<absl::string_view> v = absl::StrSplit(s, '-');
998 EXPECT_EQ(2, v.size());
999 // The first element will contain 2G of 'x's.
1000 // testing::StartsWith is too slow with a 2G string.
1001 EXPECT_EQ('x', v[0][0]);
1002 EXPECT_EQ('x', v[0][1]);
1003 EXPECT_EQ('x', v[0][3]);
1004 EXPECT_EQ("", v[1]);
1005 }
1006 }
1007
TEST(SplitInternalTest,TypeTraits)1008 TEST(SplitInternalTest, TypeTraits) {
1009 EXPECT_FALSE(absl::strings_internal::HasMappedType<int>::value);
1010 EXPECT_TRUE(
1011 (absl::strings_internal::HasMappedType<std::map<int, int>>::value));
1012 EXPECT_FALSE(absl::strings_internal::HasValueType<int>::value);
1013 EXPECT_TRUE(
1014 (absl::strings_internal::HasValueType<std::map<int, int>>::value));
1015 EXPECT_FALSE(absl::strings_internal::HasConstIterator<int>::value);
1016 EXPECT_TRUE(
1017 (absl::strings_internal::HasConstIterator<std::map<int, int>>::value));
1018 EXPECT_FALSE(absl::strings_internal::IsInitializerList<int>::value);
1019 EXPECT_TRUE((absl::strings_internal::IsInitializerList<
1020 std::initializer_list<int>>::value));
1021 }
1022
1023 } // namespace
1024