1 // -*- coding: utf-8 -*-
2 // Copyright 2002-2009 The RE2 Authors. All Rights Reserved.
3 // Use of this source code is governed by a BSD-style
4 // license that can be found in the LICENSE file.
5
6 // TODO: Test extractions for PartialMatch/Consume
7
8 #include <errno.h>
9 #include <stddef.h>
10 #include <stdint.h>
11 #include <string.h>
12 #include <map>
13 #include <string>
14 #include <utility>
15 #include <vector>
16 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
17 #include <sys/mman.h>
18 #include <unistd.h> /* for sysconf */
19 #endif
20
21 #include "util/test.h"
22 #include "util/logging.h"
23 #include "util/strutil.h"
24 #include "re2/re2.h"
25 #include "re2/regexp.h"
26
27 namespace re2 {
28
TEST(RE2,HexTests)29 TEST(RE2, HexTests) {
30 #define ASSERT_HEX(type, value) \
31 do { \
32 type v; \
33 ASSERT_TRUE( \
34 RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
35 ASSERT_EQ(v, 0x##value); \
36 ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
37 RE2::CRadix(&v))); \
38 ASSERT_EQ(v, 0x##value); \
39 } while (0)
40
41 ASSERT_HEX(short, 2bad);
42 ASSERT_HEX(unsigned short, 2badU);
43 ASSERT_HEX(int, dead);
44 ASSERT_HEX(unsigned int, deadU);
45 ASSERT_HEX(long, 7eadbeefL);
46 ASSERT_HEX(unsigned long, deadbeefUL);
47 ASSERT_HEX(long long, 12345678deadbeefLL);
48 ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
49
50 #undef ASSERT_HEX
51 }
52
TEST(RE2,OctalTests)53 TEST(RE2, OctalTests) {
54 #define ASSERT_OCTAL(type, value) \
55 do { \
56 type v; \
57 ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
58 ASSERT_EQ(v, 0##value); \
59 ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*", \
60 RE2::CRadix(&v))); \
61 ASSERT_EQ(v, 0##value); \
62 } while (0)
63
64 ASSERT_OCTAL(short, 77777);
65 ASSERT_OCTAL(unsigned short, 177777U);
66 ASSERT_OCTAL(int, 17777777777);
67 ASSERT_OCTAL(unsigned int, 37777777777U);
68 ASSERT_OCTAL(long, 17777777777L);
69 ASSERT_OCTAL(unsigned long, 37777777777UL);
70 ASSERT_OCTAL(long long, 777777777777777777777LL);
71 ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
72
73 #undef ASSERT_OCTAL
74 }
75
TEST(RE2,DecimalTests)76 TEST(RE2, DecimalTests) {
77 #define ASSERT_DECIMAL(type, value) \
78 do { \
79 type v; \
80 ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v)); \
81 ASSERT_EQ(v, value); \
82 ASSERT_TRUE( \
83 RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
84 ASSERT_EQ(v, value); \
85 } while (0)
86
87 ASSERT_DECIMAL(short, -1);
88 ASSERT_DECIMAL(unsigned short, 9999);
89 ASSERT_DECIMAL(int, -1000);
90 ASSERT_DECIMAL(unsigned int, 12345U);
91 ASSERT_DECIMAL(long, -10000000L);
92 ASSERT_DECIMAL(unsigned long, 3083324652U);
93 ASSERT_DECIMAL(long long, -100000000000000LL);
94 ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
95
96 #undef ASSERT_DECIMAL
97 }
98
TEST(RE2,Replace)99 TEST(RE2, Replace) {
100 struct ReplaceTest {
101 const char *regexp;
102 const char *rewrite;
103 const char *original;
104 const char *single;
105 const char *global;
106 int greplace_count;
107 };
108 static const ReplaceTest tests[] = {
109 { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
110 "\\2\\1ay",
111 "the quick brown fox jumps over the lazy dogs.",
112 "ethay quick brown fox jumps over the lazy dogs.",
113 "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
114 9 },
115 { "\\w+",
116 "\\0-NOSPAM",
117 "[email protected]",
118 "[email protected]",
119 "[email protected]",
120 4 },
121 { "^",
122 "(START)",
123 "foo",
124 "(START)foo",
125 "(START)foo",
126 1 },
127 { "^",
128 "(START)",
129 "",
130 "(START)",
131 "(START)",
132 1 },
133 { "$",
134 "(END)",
135 "",
136 "(END)",
137 "(END)",
138 1 },
139 { "b",
140 "bb",
141 "ababababab",
142 "abbabababab",
143 "abbabbabbabbabb",
144 5 },
145 { "b",
146 "bb",
147 "bbbbbb",
148 "bbbbbbb",
149 "bbbbbbbbbbbb",
150 6 },
151 { "b+",
152 "bb",
153 "bbbbbb",
154 "bb",
155 "bb",
156 1 },
157 { "b*",
158 "bb",
159 "bbbbbb",
160 "bb",
161 "bb",
162 1 },
163 { "b*",
164 "bb",
165 "aaaaa",
166 "bbaaaaa",
167 "bbabbabbabbabbabb",
168 6 },
169 // Check newline handling
170 { "a.*a",
171 "(\\0)",
172 "aba\naba",
173 "(aba)\naba",
174 "(aba)\n(aba)",
175 2 },
176 { "", NULL, NULL, NULL, NULL, 0 }
177 };
178
179 for (const ReplaceTest* t = tests; t->original != NULL; t++) {
180 std::string one(t->original);
181 ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
182 ASSERT_EQ(one, t->single);
183 std::string all(t->original);
184 ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
185 << "Got: " << all;
186 ASSERT_EQ(all, t->global);
187 }
188 }
189
TestCheckRewriteString(const char * regexp,const char * rewrite,bool expect_ok)190 static void TestCheckRewriteString(const char* regexp, const char* rewrite,
191 bool expect_ok) {
192 std::string error;
193 RE2 exp(regexp);
194 bool actual_ok = exp.CheckRewriteString(rewrite, &error);
195 EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
196 }
197
TEST(CheckRewriteString,all)198 TEST(CheckRewriteString, all) {
199 TestCheckRewriteString("abc", "foo", true);
200 TestCheckRewriteString("abc", "foo\\", false);
201 TestCheckRewriteString("abc", "foo\\0bar", true);
202
203 TestCheckRewriteString("a(b)c", "foo", true);
204 TestCheckRewriteString("a(b)c", "foo\\0bar", true);
205 TestCheckRewriteString("a(b)c", "foo\\1bar", true);
206 TestCheckRewriteString("a(b)c", "foo\\2bar", false);
207 TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
208
209 TestCheckRewriteString("a(b)(c)", "foo\\12", true);
210 TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
211 TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
212 }
213
TEST(RE2,Extract)214 TEST(RE2, Extract) {
215 std::string s;
216
217 ASSERT_TRUE(RE2::Extract("[email protected]", "(.*)@([^.]*)", "\\2!\\1", &s));
218 ASSERT_EQ(s, "kremvax!boris");
219
220 ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
221 ASSERT_EQ(s, "'foo'");
222 // check that false match doesn't overwrite
223 ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
224 ASSERT_EQ(s, "'foo'");
225 }
226
TEST(RE2,MaxSubmatchTooLarge)227 TEST(RE2, MaxSubmatchTooLarge) {
228 std::string s;
229 ASSERT_FALSE(RE2::Extract("foo", "f(o+)", "\\1\\2", &s));
230 s = "foo";
231 ASSERT_FALSE(RE2::Replace(&s, "f(o+)", "\\1\\2"));
232 s = "foo";
233 ASSERT_FALSE(RE2::GlobalReplace(&s, "f(o+)", "\\1\\2"));
234 }
235
TEST(RE2,Consume)236 TEST(RE2, Consume) {
237 RE2 r("\\s*(\\w+)"); // matches a word, possibly proceeded by whitespace
238 std::string word;
239
240 std::string s(" aaa b!@#$@#$cccc");
241 StringPiece input(s);
242
243 ASSERT_TRUE(RE2::Consume(&input, r, &word));
244 ASSERT_EQ(word, "aaa") << " input: " << input;
245 ASSERT_TRUE(RE2::Consume(&input, r, &word));
246 ASSERT_EQ(word, "b") << " input: " << input;
247 ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
248 }
249
TEST(RE2,ConsumeN)250 TEST(RE2, ConsumeN) {
251 const std::string s(" one two three 4");
252 StringPiece input(s);
253
254 RE2::Arg argv[2];
255 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
256
257 // 0 arg
258 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0)); // Skips "one".
259
260 // 1 arg
261 std::string word;
262 argv[0] = &word;
263 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
264 EXPECT_EQ("two", word);
265
266 // Multi-args
267 int n;
268 argv[1] = &n;
269 EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
270 EXPECT_EQ("three", word);
271 EXPECT_EQ(4, n);
272 }
273
TEST(RE2,FindAndConsume)274 TEST(RE2, FindAndConsume) {
275 RE2 r("(\\w+)"); // matches a word
276 std::string word;
277
278 std::string s(" aaa b!@#$@#$cccc");
279 StringPiece input(s);
280
281 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
282 ASSERT_EQ(word, "aaa");
283 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
284 ASSERT_EQ(word, "b");
285 ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
286 ASSERT_EQ(word, "cccc");
287 ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
288
289 // Check that FindAndConsume works without any submatches.
290 // Earlier version used uninitialized data for
291 // length to consume.
292 input = "aaa";
293 ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
294 ASSERT_EQ(input, "");
295 }
296
TEST(RE2,FindAndConsumeN)297 TEST(RE2, FindAndConsumeN) {
298 const std::string s(" one two three 4");
299 StringPiece input(s);
300
301 RE2::Arg argv[2];
302 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
303
304 // 0 arg
305 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0)); // Skips "one".
306
307 // 1 arg
308 std::string word;
309 argv[0] = &word;
310 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
311 EXPECT_EQ("two", word);
312
313 // Multi-args
314 int n;
315 argv[1] = &n;
316 EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
317 EXPECT_EQ("three", word);
318 EXPECT_EQ(4, n);
319 }
320
TEST(RE2,MatchNumberPeculiarity)321 TEST(RE2, MatchNumberPeculiarity) {
322 RE2 r("(foo)|(bar)|(baz)");
323 std::string word1;
324 std::string word2;
325 std::string word3;
326
327 ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
328 ASSERT_EQ(word1, "foo");
329 ASSERT_EQ(word2, "");
330 ASSERT_EQ(word3, "");
331 ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
332 ASSERT_EQ(word1, "");
333 ASSERT_EQ(word2, "bar");
334 ASSERT_EQ(word3, "");
335 ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
336 ASSERT_EQ(word1, "");
337 ASSERT_EQ(word2, "");
338 ASSERT_EQ(word3, "baz");
339 ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
340
341 std::string a;
342 ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
343 ASSERT_EQ(a, "");
344 }
345
TEST(RE2,Match)346 TEST(RE2, Match) {
347 RE2 re("((\\w+):([0-9]+))"); // extracts host and port
348 StringPiece group[4];
349
350 // No match.
351 StringPiece s = "zyzzyva";
352 ASSERT_FALSE(
353 re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
354
355 // Matches and extracts.
356 s = "a chrisr:9000 here";
357 ASSERT_TRUE(
358 re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
359 ASSERT_EQ(group[0], "chrisr:9000");
360 ASSERT_EQ(group[1], "chrisr:9000");
361 ASSERT_EQ(group[2], "chrisr");
362 ASSERT_EQ(group[3], "9000");
363
364 std::string all, host;
365 int port;
366 ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
367 ASSERT_EQ(all, "chrisr:9000");
368 ASSERT_EQ(host, "chrisr");
369 ASSERT_EQ(port, 9000);
370 }
371
TestRecursion(int size,const char * pattern)372 static void TestRecursion(int size, const char* pattern) {
373 // Fill up a string repeating the pattern given
374 std::string domain;
375 domain.resize(size);
376 size_t patlen = strlen(pattern);
377 for (int i = 0; i < size; i++) {
378 domain[i] = pattern[i % patlen];
379 }
380 // Just make sure it doesn't crash due to too much recursion.
381 RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
382 RE2::FullMatch(domain, re);
383 }
384
385 // A meta-quoted string, interpreted as a pattern, should always match
386 // the original unquoted string.
TestQuoteMeta(const std::string & unquoted,const RE2::Options & options=RE2::DefaultOptions)387 static void TestQuoteMeta(const std::string& unquoted,
388 const RE2::Options& options = RE2::DefaultOptions) {
389 std::string quoted = RE2::QuoteMeta(unquoted);
390 RE2 re(quoted, options);
391 EXPECT_TRUE(RE2::FullMatch(unquoted, re))
392 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
393 }
394
395 // A meta-quoted string, interpreted as a pattern, should always match
396 // the original unquoted string.
NegativeTestQuoteMeta(const std::string & unquoted,const std::string & should_not_match,const RE2::Options & options=RE2::DefaultOptions)397 static void NegativeTestQuoteMeta(
398 const std::string& unquoted, const std::string& should_not_match,
399 const RE2::Options& options = RE2::DefaultOptions) {
400 std::string quoted = RE2::QuoteMeta(unquoted);
401 RE2 re(quoted, options);
402 EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
403 << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
404 }
405
406 // Tests that quoted meta characters match their original strings,
407 // and that a few things that shouldn't match indeed do not.
TEST(QuoteMeta,Simple)408 TEST(QuoteMeta, Simple) {
409 TestQuoteMeta("foo");
410 TestQuoteMeta("foo.bar");
411 TestQuoteMeta("foo\\.bar");
412 TestQuoteMeta("[1-9]");
413 TestQuoteMeta("1.5-2.0?");
414 TestQuoteMeta("\\d");
415 TestQuoteMeta("Who doesn't like ice cream?");
416 TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
417 TestQuoteMeta("((?!)xxx).*yyy");
418 TestQuoteMeta("([");
419 }
TEST(QuoteMeta,SimpleNegative)420 TEST(QuoteMeta, SimpleNegative) {
421 NegativeTestQuoteMeta("foo", "bar");
422 NegativeTestQuoteMeta("...", "bar");
423 NegativeTestQuoteMeta("\\.", ".");
424 NegativeTestQuoteMeta("\\.", "..");
425 NegativeTestQuoteMeta("(a)", "a");
426 NegativeTestQuoteMeta("(a|b)", "a");
427 NegativeTestQuoteMeta("(a|b)", "(a)");
428 NegativeTestQuoteMeta("(a|b)", "a|b");
429 NegativeTestQuoteMeta("[0-9]", "0");
430 NegativeTestQuoteMeta("[0-9]", "0-9");
431 NegativeTestQuoteMeta("[0-9]", "[9]");
432 NegativeTestQuoteMeta("((?!)xxx)", "xxx");
433 }
434
TEST(QuoteMeta,Latin1)435 TEST(QuoteMeta, Latin1) {
436 TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
437 }
438
TEST(QuoteMeta,UTF8)439 TEST(QuoteMeta, UTF8) {
440 TestQuoteMeta("Plácido Domingo");
441 TestQuoteMeta("xyz"); // No fancy utf8.
442 TestQuoteMeta("\xc2\xb0"); // 2-byte utf8 -- a degree symbol.
443 TestQuoteMeta("27\xc2\xb0 degrees"); // As a middle character.
444 TestQuoteMeta("\xe2\x80\xb3"); // 3-byte utf8 -- a double prime.
445 TestQuoteMeta("\xf0\x9d\x85\x9f"); // 4-byte utf8 -- a music note.
446 TestQuoteMeta("27\xc2\xb0"); // Interpreted as Latin-1, this should
447 // still work.
448 NegativeTestQuoteMeta("27\xc2\xb0",
449 "27\\\xc2\\\xb0"); // 2-byte utf8 -- a degree symbol.
450 }
451
TEST(QuoteMeta,HasNull)452 TEST(QuoteMeta, HasNull) {
453 std::string has_null;
454
455 // string with one null character
456 has_null += '\0';
457 TestQuoteMeta(has_null);
458 NegativeTestQuoteMeta(has_null, "");
459
460 // Don't want null-followed-by-'1' to be interpreted as '\01'.
461 has_null += '1';
462 TestQuoteMeta(has_null);
463 NegativeTestQuoteMeta(has_null, "\1");
464 }
465
TEST(ProgramSize,BigProgram)466 TEST(ProgramSize, BigProgram) {
467 RE2 re_simple("simple regexp");
468 RE2 re_medium("medium.*regexp");
469 RE2 re_complex("complex.{1,128}regexp");
470
471 ASSERT_GT(re_simple.ProgramSize(), 0);
472 ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
473 ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
474
475 ASSERT_GT(re_simple.ReverseProgramSize(), 0);
476 ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
477 ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
478 }
479
TEST(ProgramFanout,BigProgram)480 TEST(ProgramFanout, BigProgram) {
481 RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
482 RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
483 RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
484 RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
485
486 std::vector<int> histogram;
487
488 // 3 is the largest non-empty bucket and has 2 element.
489 ASSERT_EQ(3, re1.ProgramFanout(&histogram));
490 ASSERT_EQ(2, histogram[3]);
491
492 // 6 is the largest non-empty bucket and has 11 elements.
493 ASSERT_EQ(6, re10.ProgramFanout(&histogram));
494 ASSERT_EQ(11, histogram[6]);
495
496 // 9 is the largest non-empty bucket and has 101 elements.
497 ASSERT_EQ(9, re100.ProgramFanout(&histogram));
498 ASSERT_EQ(101, histogram[9]);
499
500 // 13 is the largest non-empty bucket and has 1001 elements.
501 ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
502 ASSERT_EQ(1001, histogram[13]);
503
504 // 2 is the largest non-empty bucket and has 2 element.
505 ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
506 ASSERT_EQ(2, histogram[2]);
507
508 // 5 is the largest non-empty bucket and has 11 elements.
509 ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
510 ASSERT_EQ(11, histogram[5]);
511
512 // 9 is the largest non-empty bucket and has 101 elements.
513 ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
514 ASSERT_EQ(101, histogram[9]);
515
516 // 12 is the largest non-empty bucket and has 1001 elements.
517 ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
518 ASSERT_EQ(1001, histogram[12]);
519 }
520
521 // Issue 956519: handling empty character sets was
522 // causing NULL dereference. This tests a few empty character sets.
523 // (The way to get an empty character set is to negate a full one.)
TEST(EmptyCharset,Fuzz)524 TEST(EmptyCharset, Fuzz) {
525 static const char *empties[] = {
526 "[^\\S\\s]",
527 "[^\\S[:space:]]",
528 "[^\\D\\d]",
529 "[^\\D[:digit:]]"
530 };
531 for (size_t i = 0; i < arraysize(empties); i++)
532 ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
533 }
534
535 // Bitstate assumes that kInstFail instructions in
536 // alternations or capture groups have been "compiled away".
TEST(EmptyCharset,BitstateAssumptions)537 TEST(EmptyCharset, BitstateAssumptions) {
538 // Captures trigger use of Bitstate.
539 static const char *nop_empties[] = {
540 "((((()))))" "[^\\S\\s]?",
541 "((((()))))" "([^\\S\\s])?",
542 "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
543 "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
544 };
545 StringPiece group[6];
546 for (size_t i = 0; i < arraysize(nop_empties); i++)
547 ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
548 }
549
550 // Test that named groups work correctly.
TEST(Capture,NamedGroups)551 TEST(Capture, NamedGroups) {
552 {
553 RE2 re("(hello world)");
554 ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
555 const std::map<std::string, int>& m = re.NamedCapturingGroups();
556 ASSERT_EQ(m.size(), 0);
557 }
558
559 {
560 RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
561 ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
562 const std::map<std::string, int>& m = re.NamedCapturingGroups();
563 ASSERT_EQ(m.size(), 4);
564 ASSERT_EQ(m.find("A")->second, 1);
565 ASSERT_EQ(m.find("B")->second, 2);
566 ASSERT_EQ(m.find("C")->second, 3);
567 ASSERT_EQ(m.find("D")->second, 6); // $4 and $5 are anonymous
568 }
569 }
570
TEST(RE2,CapturedGroupTest)571 TEST(RE2, CapturedGroupTest) {
572 RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
573 int num_groups = re.NumberOfCapturingGroups();
574 EXPECT_EQ(2, num_groups);
575 std::string args[4];
576 RE2::Arg arg0(&args[0]);
577 RE2::Arg arg1(&args[1]);
578 RE2::Arg arg2(&args[2]);
579 RE2::Arg arg3(&args[3]);
580
581 const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
582 EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
583 re, matches, num_groups));
584 const std::map<std::string, int>& named_groups = re.NamedCapturingGroups();
585 EXPECT_TRUE(named_groups.find("S") != named_groups.end());
586 EXPECT_TRUE(named_groups.find("D") != named_groups.end());
587
588 // The named group index is 1-based.
589 int source_group_index = named_groups.find("S")->second;
590 int destination_group_index = named_groups.find("D")->second;
591 EXPECT_EQ(1, source_group_index);
592 EXPECT_EQ(2, destination_group_index);
593
594 // The args is zero-based.
595 EXPECT_EQ("mountain view", args[source_group_index - 1]);
596 EXPECT_EQ("san jose", args[destination_group_index - 1]);
597 }
598
TEST(RE2,FullMatchWithNoArgs)599 TEST(RE2, FullMatchWithNoArgs) {
600 ASSERT_TRUE(RE2::FullMatch("h", "h"));
601 ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
602 ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
603 ASSERT_FALSE(RE2::FullMatch("othello", "h.*o")); // Must be anchored at front
604 ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o")); // Must be anchored at end
605 }
606
TEST(RE2,PartialMatch)607 TEST(RE2, PartialMatch) {
608 ASSERT_TRUE(RE2::PartialMatch("x", "x"));
609 ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
610 ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
611 ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
612 ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
613 }
614
TEST(RE2,PartialMatchN)615 TEST(RE2, PartialMatchN) {
616 RE2::Arg argv[2];
617 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
618
619 // 0 arg
620 EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
621 EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
622
623 // 1 arg
624 int i;
625 argv[0] = &i;
626 EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
627 EXPECT_EQ(1001, i);
628 EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
629
630 // Multi-arg
631 std::string s;
632 argv[1] = &s;
633 EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
634 EXPECT_EQ(42, i);
635 EXPECT_EQ("life", s);
636 EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
637 }
638
TEST(RE2,FullMatchZeroArg)639 TEST(RE2, FullMatchZeroArg) {
640 // Zero-arg
641 ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
642 }
643
TEST(RE2,FullMatchOneArg)644 TEST(RE2, FullMatchOneArg) {
645 int i;
646
647 // Single-arg
648 ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)", &i));
649 ASSERT_EQ(i, 1001);
650 ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
651 ASSERT_EQ(i, -123);
652 ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
653 ASSERT_FALSE(
654 RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
655 }
656
TEST(RE2,FullMatchIntegerArg)657 TEST(RE2, FullMatchIntegerArg) {
658 int i;
659
660 // Digits surrounding integer-arg
661 ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
662 ASSERT_EQ(i, 23);
663 ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
664 ASSERT_EQ(i, 1);
665 ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
666 ASSERT_EQ(i, -1);
667 ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
668 ASSERT_EQ(i, 1);
669 ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
670 ASSERT_EQ(i, -1);
671 }
672
TEST(RE2,FullMatchStringArg)673 TEST(RE2, FullMatchStringArg) {
674 std::string s;
675 // String-arg
676 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
677 ASSERT_EQ(s, std::string("ell"));
678 }
679
TEST(RE2,FullMatchStringPieceArg)680 TEST(RE2, FullMatchStringPieceArg) {
681 int i;
682 // StringPiece-arg
683 StringPiece sp;
684 ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
685 ASSERT_EQ(sp.size(), 4);
686 ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
687 ASSERT_EQ(i, 1234);
688 }
689
TEST(RE2,FullMatchMultiArg)690 TEST(RE2, FullMatchMultiArg) {
691 int i;
692 std::string s;
693 // Multi-arg
694 ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
695 ASSERT_EQ(s, std::string("ruby"));
696 ASSERT_EQ(i, 1234);
697 }
698
TEST(RE2,FullMatchN)699 TEST(RE2, FullMatchN) {
700 RE2::Arg argv[2];
701 const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
702
703 // 0 arg
704 EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
705 EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
706
707 // 1 arg
708 int i;
709 argv[0] = &i;
710 EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
711 EXPECT_EQ(1001, i);
712 EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
713
714 // Multi-arg
715 std::string s;
716 argv[1] = &s;
717 EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
718 EXPECT_EQ(42, i);
719 EXPECT_EQ("life", s);
720 EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
721 }
722
TEST(RE2,FullMatchIgnoredArg)723 TEST(RE2, FullMatchIgnoredArg) {
724 int i;
725 std::string s;
726
727 // Old-school NULL should be ignored.
728 ASSERT_TRUE(
729 RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
730 ASSERT_EQ(s, std::string("ruby"));
731 ASSERT_EQ(i, 1234);
732
733 // C++11 nullptr should also be ignored.
734 ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
735 ASSERT_EQ(s, std::string("rubz"));
736 ASSERT_EQ(i, 1235);
737 }
738
TEST(RE2,FullMatchTypedNullArg)739 TEST(RE2, FullMatchTypedNullArg) {
740 std::string s;
741
742 // Ignore non-void* NULL arg
743 ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
744 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (std::string*)NULL));
745 ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
746 ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
747 ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
748 ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
749 ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
750
751 // Fail on non-void* NULL arg if the match doesn't parse for the given type.
752 ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
753 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
754 ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
755 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
756 ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
757 }
758
759 // Check that numeric parsing code does not read past the end of
760 // the number being parsed.
761 // This implementation requires mmap(2) et al. and thus cannot
762 // be used unless they are available.
TEST(RE2,NULTerminated)763 TEST(RE2, NULTerminated) {
764 #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
765 char *v;
766 int x;
767 long pagesize = sysconf(_SC_PAGE_SIZE);
768
769 #ifndef MAP_ANONYMOUS
770 #define MAP_ANONYMOUS MAP_ANON
771 #endif
772 v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
773 MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
774 ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
775 LOG(INFO) << "Memory at " << (void*)v;
776 ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
777 v[pagesize - 1] = '1';
778
779 x = 0;
780 ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
781 ASSERT_EQ(x, 1);
782 #endif
783 }
784
TEST(RE2,FullMatchTypeTests)785 TEST(RE2, FullMatchTypeTests) {
786 // Type tests
787 std::string zeros(1000, '0');
788 {
789 char c;
790 ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
791 ASSERT_EQ(c, 'H');
792 }
793 {
794 unsigned char c;
795 ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
796 ASSERT_EQ(c, static_cast<unsigned char>('H'));
797 }
798 {
799 int16_t v;
800 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
801 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
802 ASSERT_TRUE(RE2::FullMatch("32767", "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
803 ASSERT_TRUE(RE2::FullMatch("-32768", "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
804 ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
805 ASSERT_FALSE(RE2::FullMatch("32768", "(-?\\d+)", &v));
806 }
807 {
808 uint16_t v;
809 ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
810 ASSERT_TRUE(RE2::FullMatch("32767", "(\\d+)", &v)); ASSERT_EQ(v, 32767);
811 ASSERT_TRUE(RE2::FullMatch("65535", "(\\d+)", &v)); ASSERT_EQ(v, 65535);
812 ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
813 }
814 {
815 int32_t v;
816 static const int32_t max = INT32_C(0x7fffffff);
817 static const int32_t min = -max - 1;
818 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
819 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
820 ASSERT_TRUE(RE2::FullMatch("2147483647", "(-?\\d+)", &v)); ASSERT_EQ(v, max);
821 ASSERT_TRUE(RE2::FullMatch("-2147483648", "(-?\\d+)", &v)); ASSERT_EQ(v, min);
822 ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
823 ASSERT_FALSE(RE2::FullMatch("2147483648", "(-?\\d+)", &v));
824
825 ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
826 ASSERT_EQ(v, max);
827 ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
828 ASSERT_EQ(v, min);
829
830 ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
831 ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
832 ASSERT_EQ(v, max);
833 ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
834 }
835 {
836 uint32_t v;
837 static const uint32_t max = UINT32_C(0xffffffff);
838 ASSERT_TRUE(RE2::FullMatch("100", "(\\d+)", &v)); ASSERT_EQ(v, 100);
839 ASSERT_TRUE(RE2::FullMatch("4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
840 ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
841 ASSERT_FALSE(RE2::FullMatch("-1", "(\\d+)", &v));
842
843 ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
844 }
845 {
846 int64_t v;
847 static const int64_t max = INT64_C(0x7fffffffffffffff);
848 static const int64_t min = -max - 1;
849 std::string str;
850
851 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
852 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
853
854 str = std::to_string(max);
855 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
856
857 str = std::to_string(min);
858 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, min);
859
860 str = std::to_string(max);
861 ASSERT_NE(str.back(), '9');
862 str.back()++;
863 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
864
865 str = std::to_string(min);
866 ASSERT_NE(str.back(), '9');
867 str.back()++;
868 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
869 }
870 {
871 uint64_t v;
872 int64_t v2;
873 static const uint64_t max = UINT64_C(0xffffffffffffffff);
874 std::string str;
875
876 ASSERT_TRUE(RE2::FullMatch("100", "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
877 ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
878
879 str = std::to_string(max);
880 ASSERT_TRUE(RE2::FullMatch(str, "(-?\\d+)", &v)); ASSERT_EQ(v, max);
881
882 ASSERT_NE(str.back(), '9');
883 str.back()++;
884 ASSERT_FALSE(RE2::FullMatch(str, "(-?\\d+)", &v));
885 }
886 }
887
TEST(RE2,FloatingPointFullMatchTypes)888 TEST(RE2, FloatingPointFullMatchTypes) {
889 std::string zeros(1000, '0');
890 {
891 float v;
892 ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
893 ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
894 ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, float(1e23));
895 ASSERT_TRUE(RE2::FullMatch(" 100", "(.*)", &v)); ASSERT_EQ(v, 100);
896
897 ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
898 ASSERT_EQ(v, float(1e23));
899
900 // 6700000000081920.1 is an edge case.
901 // 6700000000081920 is exactly halfway between
902 // two float32s, so the .1 should make it round up.
903 // However, the .1 is outside the precision possible with
904 // a float64: the nearest float64 is 6700000000081920.
905 // So if the code uses strtod and then converts to float32,
906 // round-to-even will make it round down instead of up.
907 // To pass the test, the parser must call strtof directly.
908 // This test case is carefully chosen to use only a 17-digit
909 // number, since C does not guarantee to get the correctly
910 // rounded answer for strtod and strtof unless the input is
911 // short.
912 //
913 // This is known to fail on Cygwin and MinGW due to a broken
914 // implementation of strtof(3). And apparently MSVC too. Sigh.
915 #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
916 ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
917 ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
918 ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
919 ASSERT_EQ(v, 6700000000081920.1f)
920 << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
921 #endif
922 }
923 {
924 double v;
925 ASSERT_TRUE(RE2::FullMatch("100", "(.*)", &v)); ASSERT_EQ(v, 100);
926 ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
927 ASSERT_TRUE(RE2::FullMatch("1e23", "(.*)", &v)); ASSERT_EQ(v, 1e23);
928 ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
929 ASSERT_EQ(v, double(1e23));
930
931 ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
932 ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
933 ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
934 ASSERT_EQ(v, 1.0000000596046448)
935 << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
936 }
937 }
938
TEST(RE2,FullMatchAnchored)939 TEST(RE2, FullMatchAnchored) {
940 int i;
941 // Check that matching is fully anchored
942 ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)", &i));
943 ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)", &i));
944 ASSERT_TRUE(RE2::FullMatch("x1001", "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
945 ASSERT_TRUE(RE2::FullMatch("1001x", "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
946 }
947
TEST(RE2,FullMatchBraces)948 TEST(RE2, FullMatchBraces) {
949 // Braces
950 ASSERT_TRUE(RE2::FullMatch("0abcd", "[0-9a-f+.-]{5,}"));
951 ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
952 ASSERT_FALSE(RE2::FullMatch("0abc", "[0-9a-f+.-]{5,}"));
953 }
954
TEST(RE2,Complicated)955 TEST(RE2, Complicated) {
956 // Complicated RE2
957 ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
958 ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
959 ASSERT_TRUE(RE2::FullMatch("X", "foo|bar|[A-Z]"));
960 ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
961 }
962
TEST(RE2,FullMatchEnd)963 TEST(RE2, FullMatchEnd) {
964 // Check full-match handling (needs '$' tacked on internally)
965 ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
966 ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
967 ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
968 ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
969 ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
970 ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
971 ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
972
973 // Uncomment the following if we change the handling of '$' to
974 // prevent it from matching a trailing newline
975 if (false) {
976 // Check that we don't get bitten by pcre's special handling of a
977 // '\n' at the end of the string matching '$'
978 ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
979 }
980 }
981
TEST(RE2,FullMatchArgCount)982 TEST(RE2, FullMatchArgCount) {
983 // Number of args
984 int a[16];
985 ASSERT_TRUE(RE2::FullMatch("", ""));
986
987 memset(a, 0, sizeof(0));
988 ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
989 ASSERT_EQ(a[0], 1);
990
991 memset(a, 0, sizeof(0));
992 ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
993 ASSERT_EQ(a[0], 1);
994 ASSERT_EQ(a[1], 2);
995
996 memset(a, 0, sizeof(0));
997 ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
998 ASSERT_EQ(a[0], 1);
999 ASSERT_EQ(a[1], 2);
1000 ASSERT_EQ(a[2], 3);
1001
1002 memset(a, 0, sizeof(0));
1003 ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1004 &a[2], &a[3]));
1005 ASSERT_EQ(a[0], 1);
1006 ASSERT_EQ(a[1], 2);
1007 ASSERT_EQ(a[2], 3);
1008 ASSERT_EQ(a[3], 4);
1009
1010 memset(a, 0, sizeof(0));
1011 ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1012 &a[2], &a[3], &a[4]));
1013 ASSERT_EQ(a[0], 1);
1014 ASSERT_EQ(a[1], 2);
1015 ASSERT_EQ(a[2], 3);
1016 ASSERT_EQ(a[3], 4);
1017 ASSERT_EQ(a[4], 5);
1018
1019 memset(a, 0, sizeof(0));
1020 ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1021 &a[1], &a[2], &a[3], &a[4], &a[5]));
1022 ASSERT_EQ(a[0], 1);
1023 ASSERT_EQ(a[1], 2);
1024 ASSERT_EQ(a[2], 3);
1025 ASSERT_EQ(a[3], 4);
1026 ASSERT_EQ(a[4], 5);
1027 ASSERT_EQ(a[5], 6);
1028
1029 memset(a, 0, sizeof(0));
1030 ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1031 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1032 ASSERT_EQ(a[0], 1);
1033 ASSERT_EQ(a[1], 2);
1034 ASSERT_EQ(a[2], 3);
1035 ASSERT_EQ(a[3], 4);
1036 ASSERT_EQ(a[4], 5);
1037 ASSERT_EQ(a[5], 6);
1038 ASSERT_EQ(a[6], 7);
1039
1040 memset(a, 0, sizeof(0));
1041 ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1042 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1043 "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1044 &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1045 &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1046 &a[13], &a[14], &a[15]));
1047 ASSERT_EQ(a[0], 1);
1048 ASSERT_EQ(a[1], 2);
1049 ASSERT_EQ(a[2], 3);
1050 ASSERT_EQ(a[3], 4);
1051 ASSERT_EQ(a[4], 5);
1052 ASSERT_EQ(a[5], 6);
1053 ASSERT_EQ(a[6], 7);
1054 ASSERT_EQ(a[7], 8);
1055 ASSERT_EQ(a[8], 9);
1056 ASSERT_EQ(a[9], 0);
1057 ASSERT_EQ(a[10], 1);
1058 ASSERT_EQ(a[11], 2);
1059 ASSERT_EQ(a[12], 3);
1060 ASSERT_EQ(a[13], 4);
1061 ASSERT_EQ(a[14], 5);
1062 ASSERT_EQ(a[15], 6);
1063 }
1064
TEST(RE2,Accessors)1065 TEST(RE2, Accessors) {
1066 // Check the pattern() accessor
1067 {
1068 const std::string kPattern = "http://([^/]+)/.*";
1069 const RE2 re(kPattern);
1070 ASSERT_EQ(kPattern, re.pattern());
1071 }
1072
1073 // Check RE2 error field.
1074 {
1075 RE2 re("foo");
1076 ASSERT_TRUE(re.error().empty()); // Must have no error
1077 ASSERT_TRUE(re.ok());
1078 ASSERT_EQ(re.error_code(), RE2::NoError);
1079 }
1080 }
1081
TEST(RE2,UTF8)1082 TEST(RE2, UTF8) {
1083 // Check UTF-8 handling
1084 // Three Japanese characters (nihongo)
1085 const char utf8_string[] = {
1086 (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1087 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1088 (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1089 0
1090 };
1091 const char utf8_pattern[] = {
1092 '.',
1093 (char)0xe6, (char)0x9c, (char)0xac, // 627c
1094 '.',
1095 0
1096 };
1097
1098 // Both should match in either mode, bytes or UTF-8
1099 RE2 re_test1(".........", RE2::Latin1);
1100 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1101 RE2 re_test2("...");
1102 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1103
1104 // Check that '.' matches one byte or UTF-8 character
1105 // according to the mode.
1106 std::string s;
1107 RE2 re_test3("(.)", RE2::Latin1);
1108 ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1109 ASSERT_EQ(s, std::string("\xe6"));
1110 RE2 re_test4("(.)");
1111 ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1112 ASSERT_EQ(s, std::string("\xe6\x97\xa5"));
1113
1114 // Check that string matches itself in either mode
1115 RE2 re_test5(utf8_string, RE2::Latin1);
1116 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1117 RE2 re_test6(utf8_string);
1118 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1119
1120 // Check that pattern matches string only in UTF8 mode
1121 RE2 re_test7(utf8_pattern, RE2::Latin1);
1122 ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1123 RE2 re_test8(utf8_pattern);
1124 ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1125 }
1126
TEST(RE2,UngreedyUTF8)1127 TEST(RE2, UngreedyUTF8) {
1128 // Check that ungreedy, UTF8 regular expressions don't match when they
1129 // oughtn't -- see bug 82246.
1130 {
1131 // This code always worked.
1132 const char* pattern = "\\w+X";
1133 const std::string target = "a aX";
1134 RE2 match_sentence(pattern, RE2::Latin1);
1135 RE2 match_sentence_re(pattern);
1136
1137 ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1138 ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1139 }
1140 {
1141 const char* pattern = "(?U)\\w+X";
1142 const std::string target = "a aX";
1143 RE2 match_sentence(pattern, RE2::Latin1);
1144 ASSERT_EQ(match_sentence.error(), "");
1145 RE2 match_sentence_re(pattern);
1146
1147 ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1148 ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1149 }
1150 }
1151
TEST(RE2,Rejects)1152 TEST(RE2, Rejects) {
1153 {
1154 RE2 re("a\\1", RE2::Quiet);
1155 ASSERT_FALSE(re.ok()); }
1156 {
1157 RE2 re("a[x", RE2::Quiet);
1158 ASSERT_FALSE(re.ok());
1159 }
1160 {
1161 RE2 re("a[z-a]", RE2::Quiet);
1162 ASSERT_FALSE(re.ok());
1163 }
1164 {
1165 RE2 re("a[[:foobar:]]", RE2::Quiet);
1166 ASSERT_FALSE(re.ok());
1167 }
1168 {
1169 RE2 re("a(b", RE2::Quiet);
1170 ASSERT_FALSE(re.ok());
1171 }
1172 {
1173 RE2 re("a\\", RE2::Quiet);
1174 ASSERT_FALSE(re.ok());
1175 }
1176 }
1177
TEST(RE2,NoCrash)1178 TEST(RE2, NoCrash) {
1179 // Test that using a bad regexp doesn't crash.
1180 {
1181 RE2 re("a\\", RE2::Quiet);
1182 ASSERT_FALSE(re.ok());
1183 ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1184 }
1185
1186 // Test that using an enormous regexp doesn't crash
1187 {
1188 RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1189 ASSERT_FALSE(re.ok());
1190 ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1191 }
1192
1193 // Test that a crazy regexp still compiles and runs.
1194 {
1195 RE2 re(".{512}x", RE2::Quiet);
1196 ASSERT_TRUE(re.ok());
1197 std::string s;
1198 s.append(515, 'c');
1199 s.append("x");
1200 ASSERT_TRUE(RE2::PartialMatch(s, re));
1201 }
1202 }
1203
TEST(RE2,Recursion)1204 TEST(RE2, Recursion) {
1205 // Test that recursion is stopped.
1206 // This test is PCRE-legacy -- there's no recursion in RE2.
1207 int bytes = 15 * 1024; // enough to crash PCRE
1208 TestRecursion(bytes, ".");
1209 TestRecursion(bytes, "a");
1210 TestRecursion(bytes, "a.");
1211 TestRecursion(bytes, "ab.");
1212 TestRecursion(bytes, "abc.");
1213 }
1214
TEST(RE2,BigCountedRepetition)1215 TEST(RE2, BigCountedRepetition) {
1216 // Test that counted repetition works, given tons of memory.
1217 RE2::Options opt;
1218 opt.set_max_mem(256<<20);
1219
1220 RE2 re(".{512}x", opt);
1221 ASSERT_TRUE(re.ok());
1222 std::string s;
1223 s.append(515, 'c');
1224 s.append("x");
1225 ASSERT_TRUE(RE2::PartialMatch(s, re));
1226 }
1227
TEST(RE2,DeepRecursion)1228 TEST(RE2, DeepRecursion) {
1229 // Test for deep stack recursion. This would fail with a
1230 // segmentation violation due to stack overflow before pcre was
1231 // patched.
1232 // Again, a PCRE legacy test. RE2 doesn't recurse.
1233 std::string comment("x*");
1234 std::string a(131072, 'a');
1235 comment += a;
1236 comment += "*x";
1237 RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1238 ASSERT_TRUE(RE2::FullMatch(comment, re));
1239 }
1240
1241 // Suggested by Josh Hyman. Failed when SearchOnePass was
1242 // not implementing case-folding.
TEST(CaseInsensitive,MatchAndConsume)1243 TEST(CaseInsensitive, MatchAndConsume) {
1244 std::string text = "A fish named *Wanda*";
1245 StringPiece sp(text);
1246 StringPiece result;
1247 EXPECT_TRUE(RE2::PartialMatch(text, "(?i)([wand]{5})", &result));
1248 EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1249 }
1250
1251 // RE2 should permit implicit conversions from string, StringPiece, const char*,
1252 // and C string literals.
TEST(RE2,ImplicitConversions)1253 TEST(RE2, ImplicitConversions) {
1254 std::string re_string(".");
1255 StringPiece re_stringpiece(".");
1256 const char* re_cstring = ".";
1257 EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1258 EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1259 EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1260 EXPECT_TRUE(RE2::PartialMatch("e", "."));
1261 }
1262
1263 // Bugs introduced by 8622304
TEST(RE2,CL8622304)1264 TEST(RE2, CL8622304) {
1265 // reported by ingow
1266 std::string dir;
1267 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])")); // ok
1268 EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir)); // fails
1269
1270 // reported by jacobsa
1271 std::string key, val;
1272 EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1273 "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1274 &key,
1275 &val));
1276 EXPECT_EQ(key, "bar");
1277 EXPECT_EQ(val, "1,0x2F,030,4,5");
1278 }
1279
1280 // Check that RE2 returns correct regexp pieces on error.
1281 // In particular, make sure it returns whole runes
1282 // and that it always reports invalid UTF-8.
1283 // Also check that Perl error flag piece is big enough.
1284 static struct ErrorTest {
1285 const char *regexp;
1286 RE2::ErrorCode error_code;
1287 const char *error_arg;
1288 } error_tests[] = {
1289 { "ab\\αcd", RE2::ErrorBadEscape, "\\α" },
1290 { "ef\\x☺01", RE2::ErrorBadEscape, "\\x☺0" },
1291 { "gh\\x1☺01", RE2::ErrorBadEscape, "\\x1☺" },
1292 { "ij\\x1", RE2::ErrorBadEscape, "\\x1" },
1293 { "kl\\x", RE2::ErrorBadEscape, "\\x" },
1294 { "uv\\x{0000☺}", RE2::ErrorBadEscape, "\\x{0000☺" },
1295 { "wx\\p{ABC", RE2::ErrorBadCharRange, "\\p{ABC" },
1296 // used to return (?s but the error is X
1297 { "yz(?smiUX:abc)", RE2::ErrorBadPerlOp, "(?smiUX" },
1298 { "aa(?sm☺i", RE2::ErrorBadPerlOp, "(?sm☺" },
1299 { "bb[abc", RE2::ErrorMissingBracket, "[abc" },
1300 { "abc(def", RE2::ErrorMissingParen, "abc(def" },
1301 { "abc)def", RE2::ErrorUnexpectedParen, "abc)def" },
1302
1303 // no argument string returned for invalid UTF-8
1304 { "mn\\x1\377", RE2::ErrorBadUTF8, "" },
1305 { "op\377qr", RE2::ErrorBadUTF8, "" },
1306 { "st\\x{00000\377", RE2::ErrorBadUTF8, "" },
1307 { "zz\\p{\377}", RE2::ErrorBadUTF8, "" },
1308 { "zz\\x{00\377}", RE2::ErrorBadUTF8, "" },
1309 { "zz(?P<name\377>abc)", RE2::ErrorBadUTF8, "" },
1310 };
TEST(RE2,ErrorCodeAndArg)1311 TEST(RE2, ErrorCodeAndArg) {
1312 for (size_t i = 0; i < arraysize(error_tests); i++) {
1313 RE2 re(error_tests[i].regexp, RE2::Quiet);
1314 EXPECT_FALSE(re.ok());
1315 EXPECT_EQ(re.error_code(), error_tests[i].error_code) << re.error();
1316 EXPECT_EQ(re.error_arg(), error_tests[i].error_arg) << re.error();
1317 }
1318 }
1319
1320 // Check that "never match \n" mode never matches \n.
1321 static struct NeverTest {
1322 const char* regexp;
1323 const char* text;
1324 const char* match;
1325 } never_tests[] = {
1326 { "(.*)", "abc\ndef\nghi\n", "abc" },
1327 { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1328 { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1329 { "(abc[^x]*def)", "abc\ndef\n", NULL },
1330 { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1331 };
TEST(RE2,NeverNewline)1332 TEST(RE2, NeverNewline) {
1333 RE2::Options opt;
1334 opt.set_never_nl(true);
1335 for (size_t i = 0; i < arraysize(never_tests); i++) {
1336 const NeverTest& t = never_tests[i];
1337 RE2 re(t.regexp, opt);
1338 if (t.match == NULL) {
1339 EXPECT_FALSE(re.PartialMatch(t.text, re));
1340 } else {
1341 StringPiece m;
1342 EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1343 EXPECT_EQ(m, t.match);
1344 }
1345 }
1346 }
1347
1348 // Check that dot_nl option works.
TEST(RE2,DotNL)1349 TEST(RE2, DotNL) {
1350 RE2::Options opt;
1351 opt.set_dot_nl(true);
1352 EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1353 EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1354 opt.set_never_nl(true);
1355 EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1356 }
1357
1358 // Check that there are no capturing groups in "never capture" mode.
TEST(RE2,NeverCapture)1359 TEST(RE2, NeverCapture) {
1360 RE2::Options opt;
1361 opt.set_never_capture(true);
1362 RE2 re("(r)(e)", opt);
1363 EXPECT_EQ(0, re.NumberOfCapturingGroups());
1364 }
1365
1366 // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1367 // Triggered by a failed DFA search falling back to Bitstate when
1368 // using Match with a NULL submatch set. Bitstate tried to read
1369 // the submatch[0] entry even if nsubmatch was 0.
TEST(RE2,BitstateCaptureBug)1370 TEST(RE2, BitstateCaptureBug) {
1371 RE2::Options opt;
1372 opt.set_max_mem(20000);
1373 RE2 re("(_________$)", opt);
1374 StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1375 EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1376 }
1377
1378 // C++ version of bug 609710.
TEST(RE2,UnicodeClasses)1379 TEST(RE2, UnicodeClasses) {
1380 const std::string str = "ABCDEFGHI譚永鋒";
1381 std::string a, b, c;
1382
1383 EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1384 EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1385 EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1386 EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1387 EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1388 EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1389
1390 EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1391 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1392 EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1393 EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1394 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1395 EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1396
1397 EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1398 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1399 EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1400 EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1401 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1402 EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1403
1404 EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1405 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1406 EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1407 EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1408 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1409 EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1410
1411 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1412 EXPECT_EQ("A", a);
1413 EXPECT_EQ("B", b);
1414 EXPECT_EQ("C", c);
1415
1416 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1417 EXPECT_EQ("A", a);
1418 EXPECT_EQ("B", b);
1419 EXPECT_EQ("C", c);
1420
1421 EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1422
1423 EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1424 EXPECT_EQ("A", a);
1425 EXPECT_EQ("B", b);
1426 EXPECT_EQ("C", c);
1427
1428 EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1429
1430 EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1431 EXPECT_EQ("譚", a);
1432 EXPECT_EQ("永", b);
1433 EXPECT_EQ("鋒", c);
1434 }
1435
TEST(RE2,LazyRE2)1436 TEST(RE2, LazyRE2) {
1437 // Test with and without options.
1438 static LazyRE2 a = {"a"};
1439 static LazyRE2 b = {"b", RE2::Latin1};
1440
1441 EXPECT_EQ("a", a->pattern());
1442 EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1443
1444 EXPECT_EQ("b", b->pattern());
1445 EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1446 }
1447
1448 // Bug reported by saito. 2009/02/17
TEST(RE2,NullVsEmptyString)1449 TEST(RE2, NullVsEmptyString) {
1450 RE2 re(".*");
1451 EXPECT_TRUE(re.ok());
1452
1453 StringPiece null;
1454 EXPECT_TRUE(RE2::FullMatch(null, re));
1455
1456 StringPiece empty("");
1457 EXPECT_TRUE(RE2::FullMatch(empty, re));
1458 }
1459
1460 // Similar to the previous test, check that the null string and the empty
1461 // string both match, but also that the null string can only provide null
1462 // submatches whereas the empty string can also provide empty submatches.
TEST(RE2,NullVsEmptyStringSubmatches)1463 TEST(RE2, NullVsEmptyStringSubmatches) {
1464 RE2 re("()|(foo)");
1465 EXPECT_TRUE(re.ok());
1466
1467 // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1468 StringPiece matches[4];
1469
1470 for (size_t i = 0; i < arraysize(matches); i++)
1471 matches[i] = "bar";
1472
1473 StringPiece null;
1474 EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1475 matches, arraysize(matches)));
1476 for (size_t i = 0; i < arraysize(matches); i++) {
1477 EXPECT_TRUE(matches[i].data() == NULL); // always null
1478 EXPECT_TRUE(matches[i].empty());
1479 }
1480
1481 for (size_t i = 0; i < arraysize(matches); i++)
1482 matches[i] = "bar";
1483
1484 StringPiece empty("");
1485 EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1486 matches, arraysize(matches)));
1487 EXPECT_TRUE(matches[0].data() != NULL); // empty, not null
1488 EXPECT_TRUE(matches[0].empty());
1489 EXPECT_TRUE(matches[1].data() != NULL); // empty, not null
1490 EXPECT_TRUE(matches[1].empty());
1491 EXPECT_TRUE(matches[2].data() == NULL);
1492 EXPECT_TRUE(matches[2].empty());
1493 EXPECT_TRUE(matches[3].data() == NULL);
1494 EXPECT_TRUE(matches[3].empty());
1495 }
1496
1497 // Issue 1816809
TEST(RE2,Bug1816809)1498 TEST(RE2, Bug1816809) {
1499 RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1500 StringPiece piece("llx-3;llx4");
1501 std::string x;
1502 EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1503 }
1504
1505 // Issue 3061120
TEST(RE2,Bug3061120)1506 TEST(RE2, Bug3061120) {
1507 RE2 re("(?i)\\W");
1508 EXPECT_FALSE(RE2::PartialMatch("x", re)); // always worked
1509 EXPECT_FALSE(RE2::PartialMatch("k", re)); // broke because of kelvin
1510 EXPECT_FALSE(RE2::PartialMatch("s", re)); // broke because of latin long s
1511 }
1512
TEST(RE2,CapturingGroupNames)1513 TEST(RE2, CapturingGroupNames) {
1514 // Opening parentheses annotated with group IDs:
1515 // 12 3 45 6 7
1516 RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1517 EXPECT_TRUE(re.ok());
1518 const std::map<int, std::string>& have = re.CapturingGroupNames();
1519 std::map<int, std::string> want;
1520 want[3] = "G2";
1521 want[6] = "G2";
1522 want[7] = "G1";
1523 EXPECT_EQ(want, have);
1524 }
1525
TEST(RE2,RegexpToStringLossOfAnchor)1526 TEST(RE2, RegexpToStringLossOfAnchor) {
1527 EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1528 EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1529 EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1530 EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1531 }
1532
1533 // Issue 10131674
TEST(RE2,Bug10131674)1534 TEST(RE2, Bug10131674) {
1535 // Some of these escapes describe values that do not fit in a byte.
1536 RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1537 EXPECT_FALSE(re.ok());
1538 EXPECT_FALSE(RE2::FullMatch("hello world", re));
1539 }
1540
TEST(RE2,Bug18391750)1541 TEST(RE2, Bug18391750) {
1542 // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1543 const char t[] = {
1544 (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1545 (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1546 (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1547 (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1548 (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1549 (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1550 };
1551 RE2::Options opt;
1552 opt.set_encoding(RE2::Options::EncodingLatin1);
1553 opt.set_longest_match(true);
1554 opt.set_dot_nl(true);
1555 opt.set_case_sensitive(false);
1556 RE2 re(t, opt);
1557 ASSERT_TRUE(re.ok());
1558 RE2::PartialMatch(t, re);
1559 }
1560
TEST(RE2,Bug18458852)1561 TEST(RE2, Bug18458852) {
1562 // Bug in parser accepting invalid (too large) rune,
1563 // causing compiler to fail in DCHECK in UTF-8
1564 // character class code.
1565 const char b[] = {
1566 (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1567 (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1568 (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1569 };
1570 RE2 re(b);
1571 ASSERT_FALSE(re.ok());
1572 }
1573
TEST(RE2,Bug18523943)1574 TEST(RE2, Bug18523943) {
1575 // Bug in BitState: case kFailInst failed the match entirely.
1576
1577 RE2::Options opt;
1578 const char a[] = {
1579 (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1580 };
1581 const char b[] = {
1582 (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1583 };
1584 opt.set_log_errors(false);
1585 opt.set_encoding(RE2::Options::EncodingLatin1);
1586 opt.set_posix_syntax(true);
1587 opt.set_longest_match(true);
1588 opt.set_literal(false);
1589 opt.set_never_nl(true);
1590
1591 RE2 re((const char*)b, opt);
1592 ASSERT_TRUE(re.ok());
1593 std::string s1;
1594 ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1595 }
1596
TEST(RE2,Bug21371806)1597 TEST(RE2, Bug21371806) {
1598 // Bug in parser accepting Unicode groups in Latin-1 mode,
1599 // causing compiler to fail in DCHECK in prog.cc.
1600
1601 RE2::Options opt;
1602 opt.set_encoding(RE2::Options::EncodingLatin1);
1603
1604 RE2 re("g\\p{Zl}]", opt);
1605 ASSERT_TRUE(re.ok());
1606 }
1607
TEST(RE2,Bug26356109)1608 TEST(RE2, Bug26356109) {
1609 // Bug in parser caused by factoring of common prefixes in alternations.
1610
1611 // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1612 // consume "ab" and then stop (when unanchored) whereas it should consume all
1613 // of "abc" as per first-match semantics.
1614 RE2 re("a\\C*?c|a\\C*?b");
1615 ASSERT_TRUE(re.ok());
1616
1617 std::string s = "abc";
1618 StringPiece m;
1619
1620 ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1621 ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1622
1623 ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1624 ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1625 }
1626
TEST(RE2,Issue104)1627 TEST(RE2, Issue104) {
1628 // RE2::GlobalReplace always advanced by one byte when the empty string was
1629 // matched, which would clobber any rune that is longer than one byte.
1630
1631 std::string s = "bc";
1632 ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1633 ASSERT_EQ("dbdcd", s);
1634
1635 s = "ąć";
1636 ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1637 ASSERT_EQ("ĈąĈćĈ", s);
1638
1639 s = "人类";
1640 ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1641 ASSERT_EQ("小人小类小", s);
1642 }
1643
TEST(RE2,Issue310)1644 TEST(RE2, Issue310) {
1645 // (?:|a)* matched more text than (?:|a)+ did.
1646
1647 std::string s = "aaa";
1648 StringPiece m;
1649
1650 RE2 star("(?:|a)*");
1651 ASSERT_TRUE(star.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1652 ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
1653
1654 RE2 plus("(?:|a)+");
1655 ASSERT_TRUE(plus.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1656 ASSERT_EQ(m, "") << " got m='" << m << "', want ''";
1657 }
1658
1659 } // namespace re2
1660