1 // Copyright 2006-2008 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "base/strings/string_tokenizer.h"
6
7 #include "testing/gtest/include/gtest/gtest.h"
8
9 using std::string;
10
11 namespace base {
12
13 namespace {
14
TEST(StringTokenizerTest,Simple)15 TEST(StringTokenizerTest, Simple) {
16 string input = "this is a test";
17 StringTokenizer t(input, " ");
18 // The start of string, before returning any tokens, is considered a
19 // delimiter.
20 EXPECT_TRUE(t.token_is_delim());
21
22 EXPECT_TRUE(t.GetNext());
23 EXPECT_FALSE(t.token_is_delim());
24 EXPECT_EQ("this", t.token());
25
26 EXPECT_TRUE(t.GetNext());
27 EXPECT_FALSE(t.token_is_delim());
28 EXPECT_EQ("is", t.token());
29
30 EXPECT_TRUE(t.GetNext());
31 EXPECT_FALSE(t.token_is_delim());
32 EXPECT_EQ("a", t.token());
33
34 EXPECT_TRUE(t.GetNext());
35 EXPECT_FALSE(t.token_is_delim());
36 EXPECT_EQ("test", t.token());
37
38 EXPECT_FALSE(t.GetNext());
39 // The end of string, after the last token tokens, is considered a delimiter.
40 EXPECT_TRUE(t.token_is_delim());
41 }
42
TEST(StringTokenizerTest,Reset)43 TEST(StringTokenizerTest, Reset) {
44 string input = "this is a test";
45 StringTokenizer t(input, " ");
46
47 for (int i = 0; i < 2; ++i) {
48 EXPECT_TRUE(t.token_is_delim());
49
50 EXPECT_TRUE(t.GetNext());
51 EXPECT_FALSE(t.token_is_delim());
52 EXPECT_EQ("this", t.token());
53
54 EXPECT_TRUE(t.GetNext());
55 EXPECT_FALSE(t.token_is_delim());
56 EXPECT_EQ("is", t.token());
57
58 EXPECT_TRUE(t.GetNext());
59 EXPECT_FALSE(t.token_is_delim());
60 EXPECT_EQ("a", t.token());
61
62 EXPECT_TRUE(t.GetNext());
63 EXPECT_FALSE(t.token_is_delim());
64 EXPECT_EQ("test", t.token());
65
66 EXPECT_FALSE(t.GetNext());
67 EXPECT_TRUE(t.token_is_delim());
68
69 t.Reset();
70 }
71 }
72
TEST(StringTokenizerTest,RetDelims)73 TEST(StringTokenizerTest, RetDelims) {
74 string input = "this is a test";
75 StringTokenizer t(input, " ");
76 t.set_options(StringTokenizer::RETURN_DELIMS);
77 EXPECT_TRUE(t.token_is_delim());
78
79 EXPECT_TRUE(t.GetNext());
80 EXPECT_FALSE(t.token_is_delim());
81 EXPECT_EQ("this", t.token());
82
83 EXPECT_TRUE(t.GetNext());
84 EXPECT_TRUE(t.token_is_delim());
85 EXPECT_EQ(" ", t.token());
86
87 EXPECT_TRUE(t.GetNext());
88 EXPECT_FALSE(t.token_is_delim());
89 EXPECT_EQ("is", t.token());
90
91 EXPECT_TRUE(t.GetNext());
92 EXPECT_TRUE(t.token_is_delim());
93 EXPECT_EQ(" ", t.token());
94
95 EXPECT_TRUE(t.GetNext());
96 EXPECT_FALSE(t.token_is_delim());
97 EXPECT_EQ("a", t.token());
98
99 EXPECT_TRUE(t.GetNext());
100 EXPECT_TRUE(t.token_is_delim());
101 EXPECT_EQ(" ", t.token());
102
103 EXPECT_TRUE(t.GetNext());
104 EXPECT_FALSE(t.token_is_delim());
105 EXPECT_EQ("test", t.token());
106
107 EXPECT_FALSE(t.GetNext());
108 EXPECT_TRUE(t.token_is_delim());
109 }
110
TEST(StringTokenizerTest,RetEmptyTokens)111 TEST(StringTokenizerTest, RetEmptyTokens) {
112 string input = "foo='a, b',,bar,,baz,quux";
113 StringTokenizer t(input, ",");
114 t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
115 t.set_quote_chars("'");
116
117 ASSERT_TRUE(t.GetNext());
118 EXPECT_EQ("foo='a, b'", t.token());
119
120 ASSERT_TRUE(t.GetNext());
121 EXPECT_EQ("", t.token());
122
123 ASSERT_TRUE(t.GetNext());
124 EXPECT_EQ("bar", t.token());
125
126 ASSERT_TRUE(t.GetNext());
127 EXPECT_EQ("", t.token());
128
129 ASSERT_TRUE(t.GetNext());
130 EXPECT_EQ("baz", t.token());
131
132 ASSERT_TRUE(t.GetNext());
133 EXPECT_EQ("quux", t.token());
134
135 EXPECT_FALSE(t.GetNext());
136 }
137
TEST(StringTokenizerTest,RetEmptyTokens_AtStart)138 TEST(StringTokenizerTest, RetEmptyTokens_AtStart) {
139 string input = ",bar";
140 StringTokenizer t(input, ",");
141 t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
142 t.set_quote_chars("'");
143
144 ASSERT_TRUE(t.GetNext());
145 EXPECT_EQ("", t.token());
146
147 ASSERT_TRUE(t.GetNext());
148 EXPECT_EQ("bar", t.token());
149
150 EXPECT_FALSE(t.GetNext());
151 }
152
TEST(StringTokenizerTest,RetEmptyTokens_AtEnd)153 TEST(StringTokenizerTest, RetEmptyTokens_AtEnd) {
154 string input = "bar,";
155 StringTokenizer t(input, ",");
156 t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
157 t.set_quote_chars("'");
158
159 ASSERT_TRUE(t.GetNext());
160 EXPECT_EQ("bar", t.token());
161
162 ASSERT_TRUE(t.GetNext());
163 EXPECT_EQ("", t.token());
164
165 EXPECT_FALSE(t.GetNext());
166 }
167
TEST(StringTokenizerTest,RetEmptyTokens_Both)168 TEST(StringTokenizerTest, RetEmptyTokens_Both) {
169 string input = ",";
170 StringTokenizer t(input, ",");
171 t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
172 t.set_quote_chars("'");
173
174 ASSERT_TRUE(t.GetNext());
175 EXPECT_EQ("", t.token());
176
177 ASSERT_TRUE(t.GetNext());
178 EXPECT_EQ("", t.token());
179
180 EXPECT_FALSE(t.GetNext());
181 }
182
TEST(StringTokenizerTest,RetEmptyTokens_Empty)183 TEST(StringTokenizerTest, RetEmptyTokens_Empty) {
184 string input = "";
185 StringTokenizer t(input, ",");
186 t.set_options(StringTokenizer::RETURN_EMPTY_TOKENS);
187
188 ASSERT_TRUE(t.GetNext());
189 EXPECT_EQ("", t.token());
190
191 EXPECT_FALSE(t.GetNext());
192 }
193
TEST(StringTokenizerTest,RetDelimsAndEmptyTokens)194 TEST(StringTokenizerTest, RetDelimsAndEmptyTokens) {
195 string input = "foo='a, b',,bar,,baz,quux";
196 StringTokenizer t(input, ",");
197 t.set_options(StringTokenizer::RETURN_DELIMS |
198 StringTokenizer::RETURN_EMPTY_TOKENS);
199 t.set_quote_chars("'");
200
201 ASSERT_TRUE(t.GetNext());
202 EXPECT_EQ("foo='a, b'", t.token());
203
204 ASSERT_TRUE(t.GetNext());
205 EXPECT_EQ(",", t.token());
206
207 ASSERT_TRUE(t.GetNext());
208 EXPECT_EQ("", t.token());
209
210 ASSERT_TRUE(t.GetNext());
211 EXPECT_EQ(",", t.token());
212
213 ASSERT_TRUE(t.GetNext());
214 EXPECT_EQ("bar", t.token());
215
216 ASSERT_TRUE(t.GetNext());
217 EXPECT_EQ(",", t.token());
218
219 ASSERT_TRUE(t.GetNext());
220 EXPECT_EQ("", t.token());
221
222 ASSERT_TRUE(t.GetNext());
223 EXPECT_EQ(",", t.token());
224
225 ASSERT_TRUE(t.GetNext());
226 EXPECT_EQ("baz", t.token());
227
228 ASSERT_TRUE(t.GetNext());
229 EXPECT_EQ(",", t.token());
230
231 ASSERT_TRUE(t.GetNext());
232 EXPECT_EQ("quux", t.token());
233
234 EXPECT_FALSE(t.GetNext());
235 }
236
TEST(StringTokenizerTest,ManyDelims)237 TEST(StringTokenizerTest, ManyDelims) {
238 string input = "this: is, a-test";
239 StringTokenizer t(input, ": ,-");
240
241 EXPECT_TRUE(t.GetNext());
242 EXPECT_EQ("this", t.token());
243
244 EXPECT_TRUE(t.GetNext());
245 EXPECT_EQ("is", t.token());
246
247 EXPECT_TRUE(t.GetNext());
248 EXPECT_EQ("a", t.token());
249
250 EXPECT_TRUE(t.GetNext());
251 EXPECT_EQ("test", t.token());
252
253 EXPECT_FALSE(t.GetNext());
254 }
255
TEST(StringTokenizerTest,ParseHeader)256 TEST(StringTokenizerTest, ParseHeader) {
257 string input = "Content-Type: text/html ; charset=UTF-8";
258 StringTokenizer t(input, ": ;=");
259 t.set_options(StringTokenizer::RETURN_DELIMS);
260 EXPECT_TRUE(t.token_is_delim());
261
262 EXPECT_TRUE(t.GetNext());
263 EXPECT_FALSE(t.token_is_delim());
264 EXPECT_EQ("Content-Type", t.token());
265
266 EXPECT_TRUE(t.GetNext());
267 EXPECT_TRUE(t.token_is_delim());
268 EXPECT_EQ(":", t.token());
269
270 EXPECT_TRUE(t.GetNext());
271 EXPECT_TRUE(t.token_is_delim());
272 EXPECT_EQ(" ", t.token());
273
274 EXPECT_TRUE(t.GetNext());
275 EXPECT_FALSE(t.token_is_delim());
276 EXPECT_EQ("text/html", t.token());
277
278 EXPECT_TRUE(t.GetNext());
279 EXPECT_TRUE(t.token_is_delim());
280 EXPECT_EQ(" ", t.token());
281
282 EXPECT_TRUE(t.GetNext());
283 EXPECT_TRUE(t.token_is_delim());
284 EXPECT_EQ(";", t.token());
285
286 EXPECT_TRUE(t.GetNext());
287 EXPECT_TRUE(t.token_is_delim());
288 EXPECT_EQ(" ", t.token());
289
290 EXPECT_TRUE(t.GetNext());
291 EXPECT_FALSE(t.token_is_delim());
292 EXPECT_EQ("charset", t.token());
293
294 EXPECT_TRUE(t.GetNext());
295 EXPECT_TRUE(t.token_is_delim());
296 EXPECT_EQ("=", t.token());
297
298 EXPECT_TRUE(t.GetNext());
299 EXPECT_FALSE(t.token_is_delim());
300 EXPECT_EQ("UTF-8", t.token());
301
302 EXPECT_FALSE(t.GetNext());
303 EXPECT_TRUE(t.token_is_delim());
304 }
305
TEST(StringTokenizerTest,ParseQuotedString)306 TEST(StringTokenizerTest, ParseQuotedString) {
307 string input = "foo bar 'hello world' baz";
308 StringTokenizer t(input, " ");
309 t.set_quote_chars("'");
310
311 EXPECT_TRUE(t.GetNext());
312 EXPECT_EQ("foo", t.token());
313
314 EXPECT_TRUE(t.GetNext());
315 EXPECT_EQ("bar", t.token());
316
317 EXPECT_TRUE(t.GetNext());
318 EXPECT_EQ("'hello world'", t.token());
319
320 EXPECT_TRUE(t.GetNext());
321 EXPECT_EQ("baz", t.token());
322
323 EXPECT_FALSE(t.GetNext());
324 }
325
TEST(StringTokenizerTest,ParseQuotedString_Malformed)326 TEST(StringTokenizerTest, ParseQuotedString_Malformed) {
327 string input = "bar 'hello wo";
328 StringTokenizer t(input, " ");
329 t.set_quote_chars("'");
330
331 EXPECT_TRUE(t.GetNext());
332 EXPECT_EQ("bar", t.token());
333
334 EXPECT_TRUE(t.GetNext());
335 EXPECT_EQ("'hello wo", t.token());
336
337 EXPECT_FALSE(t.GetNext());
338 }
339
TEST(StringTokenizerTest,ParseQuotedString_Multiple)340 TEST(StringTokenizerTest, ParseQuotedString_Multiple) {
341 string input = "bar 'hel\"lo\" wo' baz\"";
342 StringTokenizer t(input, " ");
343 t.set_quote_chars("'\"");
344
345 EXPECT_TRUE(t.GetNext());
346 EXPECT_EQ("bar", t.token());
347
348 EXPECT_TRUE(t.GetNext());
349 EXPECT_EQ("'hel\"lo\" wo'", t.token());
350
351 EXPECT_TRUE(t.GetNext());
352 EXPECT_EQ("baz\"", t.token());
353
354 EXPECT_FALSE(t.GetNext());
355 }
356
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes)357 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes) {
358 string input = "foo 'don\\'t do that'";
359 StringTokenizer t(input, " ");
360 t.set_quote_chars("'");
361
362 EXPECT_TRUE(t.GetNext());
363 EXPECT_EQ("foo", t.token());
364
365 EXPECT_TRUE(t.GetNext());
366 EXPECT_EQ("'don\\'t do that'", t.token());
367
368 EXPECT_FALSE(t.GetNext());
369 }
370
TEST(StringTokenizerTest,ParseQuotedString_EscapedQuotes2)371 TEST(StringTokenizerTest, ParseQuotedString_EscapedQuotes2) {
372 string input = "foo='a, b', bar";
373 StringTokenizer t(input, ", ");
374 t.set_quote_chars("'");
375
376 EXPECT_TRUE(t.GetNext());
377 EXPECT_EQ("foo='a, b'", t.token());
378
379 EXPECT_TRUE(t.GetNext());
380 EXPECT_EQ("bar", t.token());
381
382 EXPECT_FALSE(t.GetNext());
383 }
384
TEST(StringTokenizerTest,ParseWithWhitespace_NoQuotes)385 TEST(StringTokenizerTest, ParseWithWhitespace_NoQuotes) {
386 string input = "\t\t\t foo=a,\r\n b,\r\n\t\t\t bar\t ";
387 StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
388
389 EXPECT_TRUE(t.GetNext());
390 EXPECT_EQ("foo=a", t.token());
391
392 EXPECT_TRUE(t.GetNext());
393 EXPECT_EQ("b", t.token());
394
395 EXPECT_TRUE(t.GetNext());
396 EXPECT_EQ("bar", t.token());
397
398 EXPECT_FALSE(t.GetNext());
399 }
400
TEST(StringTokenizerTest,ParseWithWhitespace_Quotes)401 TEST(StringTokenizerTest, ParseWithWhitespace_Quotes) {
402 string input = "\t\t\t foo='a, b',\t\t\t bar\t ";
403 StringTokenizer t(input, ",", StringTokenizer::WhitespacePolicy::kSkipOver);
404 t.set_quote_chars("'");
405
406 EXPECT_TRUE(t.GetNext());
407 EXPECT_EQ("foo='a, b'", t.token());
408
409 EXPECT_TRUE(t.GetNext());
410 EXPECT_EQ("bar", t.token());
411
412 EXPECT_FALSE(t.GetNext());
413 }
414
415 } // namespace
416
417 } // namespace base
418