xref: /aosp_15_r20/external/cronet/base/strings/string_tokenizer_unittest.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2006-2008 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4 
5 #include "base/strings/string_tokenizer.h"
6 
7 #include "testing/gtest/include/gtest/gtest.h"
8 
9 using std::string;
10 
11 namespace base {
12 
13 namespace {
14 
TEST(StringTokenizerTest,Simple)15 TEST(StringTokenizerTest, Simple) {
16   string input = "this is a test";
17   StringTokenizer t(input, " ");
18   // The start of string, before returning any tokens, is considered a
19   // delimiter.
20   EXPECT_TRUE(t.token_is_delim());
21 
22   EXPECT_TRUE(t.GetNext());
23   EXPECT_FALSE(t.token_is_delim());
24   EXPECT_EQ("this", t.token());
25 
26   EXPECT_TRUE(t.GetNext());
27   EXPECT_FALSE(t.token_is_delim());
28   EXPECT_EQ("is", t.token());
29 
30   EXPECT_TRUE(t.GetNext());
31   EXPECT_FALSE(t.token_is_delim());
32   EXPECT_EQ("a", t.token());
33 
34   EXPECT_TRUE(t.GetNext());
35   EXPECT_FALSE(t.token_is_delim());
36   EXPECT_EQ("test", t.token());
37 
38   EXPECT_FALSE(t.GetNext());
39   // The end of string, after the last token tokens, is considered a delimiter.
40   EXPECT_TRUE(t.token_is_delim());
41 }
42 
TEST(StringTokenizerTest,Reset)43 TEST(StringTokenizerTest, Reset) {
44   string input = "this is a test";
45   StringTokenizer t(input, " ");
46 
47   for (int i = 0; i < 2; ++i) {
48     EXPECT_TRUE(t.token_is_delim());
49 
50     EXPECT_TRUE(t.GetNext());
51     EXPECT_FALSE(t.token_is_delim());
52     EXPECT_EQ("this", t.token());
53 
54     EXPECT_TRUE(t.GetNext());
55     EXPECT_FALSE(t.token_is_delim());
56     EXPECT_EQ("is", t.token());
57 
58     EXPECT_TRUE(t.GetNext());
59     EXPECT_FALSE(t.token_is_delim());
60     EXPECT_EQ("a", t.token());
61 
62     EXPECT_TRUE(t.GetNext());
63     EXPECT_FALSE(t.token_is_delim());
64     EXPECT_EQ("test", t.token());
65 
66     EXPECT_FALSE(t.GetNext());
67     EXPECT_TRUE(t.token_is_delim());
68 
69     t.Reset();
70   }
71 }
72 
TEST(StringTokenizerTest,RetDelims)73 TEST(StringTokenizerTest, RetDelims) {
74   string input = "this is a test";
75   StringTokenizer t(input, " ");
76   t.set_options(StringTokenizer::RETURN_DELIMS);
77   EXPECT_TRUE(t.token_is_delim());
78 
79   EXPECT_TRUE(t.GetNext());
80   EXPECT_FALSE(t.token_is_delim());
81   EXPECT_EQ("this", t.token());
82 
83   EXPECT_TRUE(t.GetNext());
84   EXPECT_TRUE(t.token_is_delim());
85   EXPECT_EQ(" ", t.token());
86 
87   EXPECT_TRUE(t.GetNext());
88   EXPECT_FALSE(t.token_is_delim());
89   EXPECT_EQ("is", t.token());
90 
91   EXPECT_TRUE(t.GetNext());
92   EXPECT_TRUE(t.token_is_delim());
93   EXPECT_EQ(" ", t.token());
94 
95   EXPECT_TRUE(t.GetNext());
96   EXPECT_FALSE(t.token_is_delim());
97   EXPECT_EQ("a", t.token());
98 
99   EXPECT_TRUE(t.GetNext());
100   EXPECT_TRUE(t.token_is_delim());
101   EXPECT_EQ(" ", t.token());
102 
103   EXPECT_TRUE(t.GetNext());
104   EXPECT_FALSE(t.token_is_delim());
105   EXPECT_EQ("test", t.token());
106 
107   EXPECT_FALSE(t.GetNext());
108   EXPECT_TRUE(t.token_is_delim());
109 }
110 
TEST(StringTokenizerTest,RetEmptyTokens)111 TEST(StringTokenizerTest, RetEmptyTokens) {
112   string input = "foo='a, b',,bar,,baz,quux";
113   StringTokenizer t(input, ",");
114   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
115   t.set_quote_chars("'");
116 
117   ASSERT_TRUE(t.GetNext());
118   EXPECT_EQ("foo='a, b'", t.token());
119 
120   ASSERT_TRUE(t.GetNext());
121   EXPECT_EQ("", t.token());
122 
123   ASSERT_TRUE(t.GetNext());
124   EXPECT_EQ("bar", t.token());
125 
126   ASSERT_TRUE(t.GetNext());
127   EXPECT_EQ("", t.token());
128 
129   ASSERT_TRUE(t.GetNext());
130   EXPECT_EQ("baz", t.token());
131 
132   ASSERT_TRUE(t.GetNext());
133   EXPECT_EQ("quux", t.token());
134 
135   EXPECT_FALSE(t.GetNext());
136 }
137 
TEST(StringTokenizerTest,RetEmptyTokens_AtStart)138 TEST(StringTokenizerTest, RetEmptyTokens_AtStart) {
139   string input = ",bar";
140   StringTokenizer t(input, ",");
141   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
142   t.set_quote_chars("'");
143 
144   ASSERT_TRUE(t.GetNext());
145   EXPECT_EQ("", t.token());
146 
147   ASSERT_TRUE(t.GetNext());
148   EXPECT_EQ("bar", t.token());
149 
150   EXPECT_FALSE(t.GetNext());
151 }
152 
TEST(StringTokenizerTest,RetEmptyTokens_AtEnd)153 TEST(StringTokenizerTest, RetEmptyTokens_AtEnd) {
154   string input = "bar,";
155   StringTokenizer t(input, ",");
156   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
157   t.set_quote_chars("'");
158 
159   ASSERT_TRUE(t.GetNext());
160   EXPECT_EQ("bar", t.token());
161 
162   ASSERT_TRUE(t.GetNext());
163   EXPECT_EQ("", t.token());
164 
165   EXPECT_FALSE(t.GetNext());
166 }
167 
TEST(StringTokenizerTest,RetEmptyTokens_Both)168 TEST(StringTokenizerTest, RetEmptyTokens_Both) {
169   string input = ",";
170   StringTokenizer t(input, ",");
171   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
172   t.set_quote_chars("'");
173 
174   ASSERT_TRUE(t.GetNext());
175   EXPECT_EQ("", t.token());
176 
177   ASSERT_TRUE(t.GetNext());
178   EXPECT_EQ("", t.token());
179 
180   EXPECT_FALSE(t.GetNext());
181 }
182 
TEST(StringTokenizerTest,RetEmptyTokens_Empty)183 TEST(StringTokenizerTest, RetEmptyTokens_Empty) {
184   string input = "";
185   StringTokenizer t(input, ",");
186   t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
187 
188   ASSERT_TRUE(t.GetNext());
189   EXPECT_EQ("", t.token());
190 
191   EXPECT_FALSE(t.GetNext());
192 }
193 
TEST(StringTokenizerTest,RetDelimsAndEmptyTokens)194 TEST(StringTokenizerTest, RetDelimsAndEmptyTokens) {
195   string input = "foo='a, b',,bar,,baz,quux";
196   StringTokenizer t(input, ",");
197   t.set_options(StringTokenizer::RETURN_DELIMS |
198                 StringTokenizer::RETURN_EMPTY_TOKENS);
199   t.set_quote_chars("'");
200 
201   ASSERT_TRUE(t.GetNext());
202   EXPECT_EQ("foo='a, b'", t.token());
203 
204   ASSERT_TRUE(t.GetNext());
205   EXPECT_EQ(",", t.token());
206 
207   ASSERT_TRUE(t.GetNext());
208   EXPECT_EQ("", t.token());
209 
210   ASSERT_TRUE(t.GetNext());
211   EXPECT_EQ(",", t.token());
212 
213   ASSERT_TRUE(t.GetNext());
214   EXPECT_EQ("bar", t.token());
215 
216   ASSERT_TRUE(t.GetNext());
217   EXPECT_EQ(",", t.token());
218 
219   ASSERT_TRUE(t.GetNext());
220   EXPECT_EQ("", t.token());
221 
222   ASSERT_TRUE(t.GetNext());
223   EXPECT_EQ(",", t.token());
224 
225   ASSERT_TRUE(t.GetNext());
226   EXPECT_EQ("baz", t.token());
227 
228   ASSERT_TRUE(t.GetNext());
229   EXPECT_EQ(",", t.token());
230 
231   ASSERT_TRUE(t.GetNext());
232   EXPECT_EQ("quux", t.token());
233 
234   EXPECT_FALSE(t.GetNext());
235 }
236 
TEST(StringTokenizerTest,ManyDelims)237 TEST(StringTokenizerTest, ManyDelims) {
238   string input = "this: is, a-test";
239   StringTokenizer t(input, ": ,-");
240 
241   EXPECT_TRUE(t.GetNext());
242   EXPECT_EQ("this", t.token());
243 
244   EXPECT_TRUE(t.GetNext());
245   EXPECT_EQ("is", t.token());
246 
247   EXPECT_TRUE(t.GetNext());
248   EXPECT_EQ("a", t.token());
249 
250   EXPECT_TRUE(t.GetNext());
251   EXPECT_EQ("test", t.token());
252 
253   EXPECT_FALSE(t.GetNext());
254 }
255 
TEST(StringTokenizerTest,ParseHeader)256 TEST(StringTokenizerTest, ParseHeader) {
257   string input = "Content-Type: text/html ; charset=UTF-8";
258   StringTokenizer t(input, ": ;=");
259   t.set_options(StringTokenizer::RETURN_DELIMS);
260   EXPECT_TRUE(t.token_is_delim());
261 
262   EXPECT_TRUE(t.GetNext());
263   EXPECT_FALSE(t.token_is_delim());
264   EXPECT_EQ("Content-Type", t.token());
265 
266   EXPECT_TRUE(t.GetNext());
267   EXPECT_TRUE(t.token_is_delim());
268   EXPECT_EQ(":", t.token());
269 
270   EXPECT_TRUE(t.GetNext());
271   EXPECT_TRUE(t.token_is_delim());
272   EXPECT_EQ(" ", t.token());
273 
274   EXPECT_TRUE(t.GetNext());
275   EXPECT_FALSE(t.token_is_delim());
276   EXPECT_EQ("text/html", t.token());
277 
278   EXPECT_TRUE(t.GetNext());
279   EXPECT_TRUE(t.token_is_delim());
280   EXPECT_EQ(" ", t.token());
281 
282   EXPECT_TRUE(t.GetNext());
283   EXPECT_TRUE(t.token_is_delim());
284   EXPECT_EQ(";", t.token());
285 
286   EXPECT_TRUE(t.GetNext());
287   EXPECT_TRUE(t.token_is_delim());
288   EXPECT_EQ(" ", t.token());
289 
290   EXPECT_TRUE(t.GetNext());
291   EXPECT_FALSE(t.token_is_delim());
292   EXPECT_EQ("charset", t.token());
293 
294   EXPECT_TRUE(t.GetNext());
295   EXPECT_TRUE(t.token_is_delim());
296   EXPECT_EQ("=", t.token());
297 
298   EXPECT_TRUE(t.GetNext());
299   EXPECT_FALSE(t.token_is_delim());
300   EXPECT_EQ("UTF-8", t.token());
301 
302   EXPECT_FALSE(t.GetNext());
303   EXPECT_TRUE(t.token_is_delim());
304 }
305 
TEST(StringTokenizerTest,ParseQuotedString)306 TEST(StringTokenizerTest, ParseQuotedString) {
307   string input = "foo bar 'hello world' baz";
308   StringTokenizer t(input, " ");
309   t.set_quote_chars("'");
310 
311   EXPECT_TRUE(t.GetNext());
312   EXPECT_EQ("foo", t.token());
313 
314   EXPECT_TRUE(t.GetNext());
315   EXPECT_EQ("bar", t.token());
316 
317   EXPECT_TRUE(t.GetNext());
318   EXPECT_EQ("'hello world'", t.token());
319 
320   EXPECT_TRUE(t.GetNext());
321   EXPECT_EQ("baz", t.token());
322 
323   EXPECT_FALSE(t.GetNext());
324 }
325 
TEST(StringTokenizerTest,ParseQuotedString_Malformed)326 TEST(StringTokenizerTest, ParseQuotedString_Malformed) {
327   string input = "bar 'hello wo";
328   StringTokenizer t(input, " ");
329   t.set_quote_chars("'");
330 
331   EXPECT_TRUE(t.GetNext());
332   EXPECT_EQ("bar", t.token());
333 
334   EXPECT_TRUE(t.GetNext());
335   EXPECT_EQ("'hello wo", t.token());
336 
337   EXPECT_FALSE(t.GetNext());
338 }
339 
TEST(StringTokenizerTest,ParseQuotedString_Multiple)340 TEST(StringTokenizerTest, ParseQuotedString_Multiple) {
341   string input = "bar 'hel\"lo\" wo' baz\"";
342   StringTokenizer t(input, " ");
343   t.set_quote_chars("'\"");
344 
345   EXPECT_TRUE(t.GetNext());
346   EXPECT_EQ("bar", t.token());
347 
348   EXPECT_TRUE(t.GetNext());
349   EXPECT_EQ("'hel\"lo\" wo'", t.token());
350 
351   EXPECT_TRUE(t.GetNext());
352   EXPECT_EQ("baz\"", t.token());
353 
354   EXPECT_FALSE(t.GetNext());
355 }
356 
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes)357 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes) {
358   string input = "foo 'don\\'t do that'";
359   StringTokenizer t(input, " ");
360   t.set_quote_chars("'");
361 
362   EXPECT_TRUE(t.GetNext());
363   EXPECT_EQ("foo", t.token());
364 
365   EXPECT_TRUE(t.GetNext());
366   EXPECT_EQ("'don\\'t do that'", t.token());
367 
368   EXPECT_FALSE(t.GetNext());
369 }
370 
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes2)371 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes2) {
372   string input = "foo='a, b', bar";
373   StringTokenizer t(input, ", ");
374   t.set_quote_chars("'");
375 
376   EXPECT_TRUE(t.GetNext());
377   EXPECT_EQ("foo='a, b'", t.token());
378 
379   EXPECT_TRUE(t.GetNext());
380   EXPECT_EQ("bar", t.token());
381 
382   EXPECT_FALSE(t.GetNext());
383 }
384 
TEST(StringTokenizerTest,ParseWithWhitespace_NoQuotes)385 TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) {
386   string input = "\t\t\t     foo=a,\r\n b,\r\n\t\t\t      bar\t ";
387   StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
388 
389   EXPECT_TRUE(t.GetNext());
390   EXPECT_EQ("foo=a", t.token());
391 
392   EXPECT_TRUE(t.GetNext());
393   EXPECT_EQ("b", t.token());
394 
395   EXPECT_TRUE(t.GetNext());
396   EXPECT_EQ("bar", t.token());
397 
398   EXPECT_FALSE(t.GetNext());
399 }
400 
TEST(StringTokenizerTest,ParseWithWhitespace_Quotes)401 TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) {
402   string input = "\t\t\t     foo='a, b',\t\t\t      bar\t ";
403   StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
404   t.set_quote_chars("'");
405 
406   EXPECT_TRUE(t.GetNext());
407   EXPECT_EQ("foo='a, b'", t.token());
408 
409   EXPECT_TRUE(t.GetNext());
410   EXPECT_EQ("bar", t.token());
411 
412   EXPECT_FALSE(t.GetNext());
413 }
414 
415 }  // namespace
416 
417 }  // namespace base
418