xref: /aosp_15_r20/external/regex-re2/re2/testing/re2_test.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // -*- coding: utf-8 -*-
2*ccdc9c3eSSadaf Ebrahimi // Copyright 2002-2009 The RE2 Authors.  All Rights Reserved.
3*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
4*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
5*ccdc9c3eSSadaf Ebrahimi 
6*ccdc9c3eSSadaf Ebrahimi // TODO: Test extractions for PartialMatch/Consume
7*ccdc9c3eSSadaf Ebrahimi 
8*ccdc9c3eSSadaf Ebrahimi #include <errno.h>
9*ccdc9c3eSSadaf Ebrahimi #include <stddef.h>
10*ccdc9c3eSSadaf Ebrahimi #include <stdint.h>
11*ccdc9c3eSSadaf Ebrahimi #include <string.h>
12*ccdc9c3eSSadaf Ebrahimi #include <map>
13*ccdc9c3eSSadaf Ebrahimi #include <string>
14*ccdc9c3eSSadaf Ebrahimi #include <utility>
15*ccdc9c3eSSadaf Ebrahimi #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
16*ccdc9c3eSSadaf Ebrahimi #include <sys/mman.h>
17*ccdc9c3eSSadaf Ebrahimi #include <unistd.h>  /* for sysconf */
18*ccdc9c3eSSadaf Ebrahimi #endif
19*ccdc9c3eSSadaf Ebrahimi 
20*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
21*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
22*ccdc9c3eSSadaf Ebrahimi #include "util/strutil.h"
23*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h"
24*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
25*ccdc9c3eSSadaf Ebrahimi 
26*ccdc9c3eSSadaf Ebrahimi namespace re2 {
27*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,HexTests)28*ccdc9c3eSSadaf Ebrahimi TEST(RE2, HexTests) {
29*ccdc9c3eSSadaf Ebrahimi #define ASSERT_HEX(type, value)                                         \
30*ccdc9c3eSSadaf Ebrahimi   do {                                                                  \
31*ccdc9c3eSSadaf Ebrahimi     type v;                                                             \
32*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(                                                        \
33*ccdc9c3eSSadaf Ebrahimi         RE2::FullMatch(#value, "([0-9a-fA-F]+)[uUlL]*", RE2::Hex(&v))); \
34*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0x##value);                                            \
35*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("0x" #value, "([0-9a-fA-FxX]+)[uUlL]*",  \
36*ccdc9c3eSSadaf Ebrahimi                                RE2::CRadix(&v)));                       \
37*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0x##value);                                            \
38*ccdc9c3eSSadaf Ebrahimi   } while (0)
39*ccdc9c3eSSadaf Ebrahimi 
40*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(short,              2bad);
41*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(unsigned short,     2badU);
42*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(int,                dead);
43*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(unsigned int,       deadU);
44*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(long,               7eadbeefL);
45*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(unsigned long,      deadbeefUL);
46*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(long long,          12345678deadbeefLL);
47*ccdc9c3eSSadaf Ebrahimi   ASSERT_HEX(unsigned long long, cafebabedeadbeefULL);
48*ccdc9c3eSSadaf Ebrahimi 
49*ccdc9c3eSSadaf Ebrahimi #undef ASSERT_HEX
50*ccdc9c3eSSadaf Ebrahimi }
51*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,OctalTests)52*ccdc9c3eSSadaf Ebrahimi TEST(RE2, OctalTests) {
53*ccdc9c3eSSadaf Ebrahimi #define ASSERT_OCTAL(type, value)                                           \
54*ccdc9c3eSSadaf Ebrahimi   do {                                                                      \
55*ccdc9c3eSSadaf Ebrahimi     type v;                                                                 \
56*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(#value, "([0-7]+)[uUlL]*", RE2::Octal(&v))); \
57*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0##value);                                                 \
58*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("0" #value, "([0-9a-fA-FxX]+)[uUlL]*",       \
59*ccdc9c3eSSadaf Ebrahimi                                RE2::CRadix(&v)));                           \
60*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0##value);                                                 \
61*ccdc9c3eSSadaf Ebrahimi   } while (0)
62*ccdc9c3eSSadaf Ebrahimi 
63*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(short,              77777);
64*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(unsigned short,     177777U);
65*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(int,                17777777777);
66*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(unsigned int,       37777777777U);
67*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(long,               17777777777L);
68*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(unsigned long,      37777777777UL);
69*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(long long,          777777777777777777777LL);
70*ccdc9c3eSSadaf Ebrahimi   ASSERT_OCTAL(unsigned long long, 1777777777777777777777ULL);
71*ccdc9c3eSSadaf Ebrahimi 
72*ccdc9c3eSSadaf Ebrahimi #undef ASSERT_OCTAL
73*ccdc9c3eSSadaf Ebrahimi }
74*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,DecimalTests)75*ccdc9c3eSSadaf Ebrahimi TEST(RE2, DecimalTests) {
76*ccdc9c3eSSadaf Ebrahimi #define ASSERT_DECIMAL(type, value)                                            \
77*ccdc9c3eSSadaf Ebrahimi   do {                                                                         \
78*ccdc9c3eSSadaf Ebrahimi     type v;                                                                    \
79*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(#value, "(-?[0-9]+)[uUlL]*", &v));              \
80*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, value);                                                       \
81*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(                                                               \
82*ccdc9c3eSSadaf Ebrahimi         RE2::FullMatch(#value, "(-?[0-9a-fA-FxX]+)[uUlL]*", RE2::CRadix(&v))); \
83*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, value);                                                       \
84*ccdc9c3eSSadaf Ebrahimi   } while (0)
85*ccdc9c3eSSadaf Ebrahimi 
86*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(short,              -1);
87*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(unsigned short,     9999);
88*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(int,                -1000);
89*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(unsigned int,       12345U);
90*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(long,               -10000000L);
91*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(unsigned long,      3083324652U);
92*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(long long,          -100000000000000LL);
93*ccdc9c3eSSadaf Ebrahimi   ASSERT_DECIMAL(unsigned long long, 1234567890987654321ULL);
94*ccdc9c3eSSadaf Ebrahimi 
95*ccdc9c3eSSadaf Ebrahimi #undef ASSERT_DECIMAL
96*ccdc9c3eSSadaf Ebrahimi }
97*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Replace)98*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Replace) {
99*ccdc9c3eSSadaf Ebrahimi   struct ReplaceTest {
100*ccdc9c3eSSadaf Ebrahimi     const char *regexp;
101*ccdc9c3eSSadaf Ebrahimi     const char *rewrite;
102*ccdc9c3eSSadaf Ebrahimi     const char *original;
103*ccdc9c3eSSadaf Ebrahimi     const char *single;
104*ccdc9c3eSSadaf Ebrahimi     const char *global;
105*ccdc9c3eSSadaf Ebrahimi     int        greplace_count;
106*ccdc9c3eSSadaf Ebrahimi   };
107*ccdc9c3eSSadaf Ebrahimi   static const ReplaceTest tests[] = {
108*ccdc9c3eSSadaf Ebrahimi     { "(qu|[b-df-hj-np-tv-z]*)([a-z]+)",
109*ccdc9c3eSSadaf Ebrahimi       "\\2\\1ay",
110*ccdc9c3eSSadaf Ebrahimi       "the quick brown fox jumps over the lazy dogs.",
111*ccdc9c3eSSadaf Ebrahimi       "ethay quick brown fox jumps over the lazy dogs.",
112*ccdc9c3eSSadaf Ebrahimi       "ethay ickquay ownbray oxfay umpsjay overay ethay azylay ogsday.",
113*ccdc9c3eSSadaf Ebrahimi       9 },
114*ccdc9c3eSSadaf Ebrahimi     { "\\w+",
115*ccdc9c3eSSadaf Ebrahimi       "\\0-NOSPAM",
116*ccdc9c3eSSadaf Ebrahimi       "[email protected]",
117*ccdc9c3eSSadaf Ebrahimi       "[email protected]",
118*ccdc9c3eSSadaf Ebrahimi       "[email protected]",
119*ccdc9c3eSSadaf Ebrahimi       4 },
120*ccdc9c3eSSadaf Ebrahimi     { "^",
121*ccdc9c3eSSadaf Ebrahimi       "(START)",
122*ccdc9c3eSSadaf Ebrahimi       "foo",
123*ccdc9c3eSSadaf Ebrahimi       "(START)foo",
124*ccdc9c3eSSadaf Ebrahimi       "(START)foo",
125*ccdc9c3eSSadaf Ebrahimi       1 },
126*ccdc9c3eSSadaf Ebrahimi     { "^",
127*ccdc9c3eSSadaf Ebrahimi       "(START)",
128*ccdc9c3eSSadaf Ebrahimi       "",
129*ccdc9c3eSSadaf Ebrahimi       "(START)",
130*ccdc9c3eSSadaf Ebrahimi       "(START)",
131*ccdc9c3eSSadaf Ebrahimi       1 },
132*ccdc9c3eSSadaf Ebrahimi     { "$",
133*ccdc9c3eSSadaf Ebrahimi       "(END)",
134*ccdc9c3eSSadaf Ebrahimi       "",
135*ccdc9c3eSSadaf Ebrahimi       "(END)",
136*ccdc9c3eSSadaf Ebrahimi       "(END)",
137*ccdc9c3eSSadaf Ebrahimi       1 },
138*ccdc9c3eSSadaf Ebrahimi     { "b",
139*ccdc9c3eSSadaf Ebrahimi       "bb",
140*ccdc9c3eSSadaf Ebrahimi       "ababababab",
141*ccdc9c3eSSadaf Ebrahimi       "abbabababab",
142*ccdc9c3eSSadaf Ebrahimi       "abbabbabbabbabb",
143*ccdc9c3eSSadaf Ebrahimi       5 },
144*ccdc9c3eSSadaf Ebrahimi     { "b",
145*ccdc9c3eSSadaf Ebrahimi       "bb",
146*ccdc9c3eSSadaf Ebrahimi       "bbbbbb",
147*ccdc9c3eSSadaf Ebrahimi       "bbbbbbb",
148*ccdc9c3eSSadaf Ebrahimi       "bbbbbbbbbbbb",
149*ccdc9c3eSSadaf Ebrahimi       6 },
150*ccdc9c3eSSadaf Ebrahimi     { "b+",
151*ccdc9c3eSSadaf Ebrahimi       "bb",
152*ccdc9c3eSSadaf Ebrahimi       "bbbbbb",
153*ccdc9c3eSSadaf Ebrahimi       "bb",
154*ccdc9c3eSSadaf Ebrahimi       "bb",
155*ccdc9c3eSSadaf Ebrahimi       1 },
156*ccdc9c3eSSadaf Ebrahimi     { "b*",
157*ccdc9c3eSSadaf Ebrahimi       "bb",
158*ccdc9c3eSSadaf Ebrahimi       "bbbbbb",
159*ccdc9c3eSSadaf Ebrahimi       "bb",
160*ccdc9c3eSSadaf Ebrahimi       "bb",
161*ccdc9c3eSSadaf Ebrahimi       1 },
162*ccdc9c3eSSadaf Ebrahimi     { "b*",
163*ccdc9c3eSSadaf Ebrahimi       "bb",
164*ccdc9c3eSSadaf Ebrahimi       "aaaaa",
165*ccdc9c3eSSadaf Ebrahimi       "bbaaaaa",
166*ccdc9c3eSSadaf Ebrahimi       "bbabbabbabbabbabb",
167*ccdc9c3eSSadaf Ebrahimi       6 },
168*ccdc9c3eSSadaf Ebrahimi     // Check newline handling
169*ccdc9c3eSSadaf Ebrahimi     { "a.*a",
170*ccdc9c3eSSadaf Ebrahimi       "(\\0)",
171*ccdc9c3eSSadaf Ebrahimi       "aba\naba",
172*ccdc9c3eSSadaf Ebrahimi       "(aba)\naba",
173*ccdc9c3eSSadaf Ebrahimi       "(aba)\n(aba)",
174*ccdc9c3eSSadaf Ebrahimi       2 },
175*ccdc9c3eSSadaf Ebrahimi     { "", NULL, NULL, NULL, NULL, 0 }
176*ccdc9c3eSSadaf Ebrahimi   };
177*ccdc9c3eSSadaf Ebrahimi 
178*ccdc9c3eSSadaf Ebrahimi   for (const ReplaceTest* t = tests; t->original != NULL; t++) {
179*ccdc9c3eSSadaf Ebrahimi     string one(t->original);
180*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::Replace(&one, t->regexp, t->rewrite));
181*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(one, t->single);
182*ccdc9c3eSSadaf Ebrahimi     string all(t->original);
183*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(RE2::GlobalReplace(&all, t->regexp, t->rewrite), t->greplace_count)
184*ccdc9c3eSSadaf Ebrahimi       << "Got: " << all;
185*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(all, t->global);
186*ccdc9c3eSSadaf Ebrahimi   }
187*ccdc9c3eSSadaf Ebrahimi }
188*ccdc9c3eSSadaf Ebrahimi 
TestCheckRewriteString(const char * regexp,const char * rewrite,bool expect_ok)189*ccdc9c3eSSadaf Ebrahimi static void TestCheckRewriteString(const char* regexp, const char* rewrite,
190*ccdc9c3eSSadaf Ebrahimi                               bool expect_ok) {
191*ccdc9c3eSSadaf Ebrahimi   string error;
192*ccdc9c3eSSadaf Ebrahimi   RE2 exp(regexp);
193*ccdc9c3eSSadaf Ebrahimi   bool actual_ok = exp.CheckRewriteString(rewrite, &error);
194*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(expect_ok, actual_ok) << " for " << rewrite << " error: " << error;
195*ccdc9c3eSSadaf Ebrahimi }
196*ccdc9c3eSSadaf Ebrahimi 
TEST(CheckRewriteString,all)197*ccdc9c3eSSadaf Ebrahimi TEST(CheckRewriteString, all) {
198*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("abc", "foo", true);
199*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("abc", "foo\\", false);
200*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("abc", "foo\\0bar", true);
201*ccdc9c3eSSadaf Ebrahimi 
202*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)c", "foo", true);
203*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)c", "foo\\0bar", true);
204*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)c", "foo\\1bar", true);
205*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)c", "foo\\2bar", false);
206*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)c", "f\\\\2o\\1o", true);
207*ccdc9c3eSSadaf Ebrahimi 
208*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)(c)", "foo\\12", true);
209*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)(c)", "f\\2o\\1o", true);
210*ccdc9c3eSSadaf Ebrahimi   TestCheckRewriteString("a(b)(c)", "f\\oo\\1", false);
211*ccdc9c3eSSadaf Ebrahimi }
212*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Extract)213*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Extract) {
214*ccdc9c3eSSadaf Ebrahimi   string s;
215*ccdc9c3eSSadaf Ebrahimi 
216*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::Extract("[email protected]", "(.*)@([^.]*)", "\\2!\\1", &s));
217*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, "kremvax!boris");
218*ccdc9c3eSSadaf Ebrahimi 
219*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::Extract("foo", ".*", "'\\0'", &s));
220*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, "'foo'");
221*ccdc9c3eSSadaf Ebrahimi   // check that false match doesn't overwrite
222*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::Extract("baz", "bar", "'\\0'", &s));
223*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, "'foo'");
224*ccdc9c3eSSadaf Ebrahimi }
225*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Consume)226*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Consume) {
227*ccdc9c3eSSadaf Ebrahimi   RE2 r("\\s*(\\w+)");    // matches a word, possibly proceeded by whitespace
228*ccdc9c3eSSadaf Ebrahimi   string word;
229*ccdc9c3eSSadaf Ebrahimi 
230*ccdc9c3eSSadaf Ebrahimi   string s("   aaa b!@#$@#$cccc");
231*ccdc9c3eSSadaf Ebrahimi   StringPiece input(s);
232*ccdc9c3eSSadaf Ebrahimi 
233*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::Consume(&input, r, &word));
234*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word, "aaa") << " input: " << input;
235*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::Consume(&input, r, &word));
236*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word, "b") << " input: " << input;
237*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::Consume(&input, r, &word)) << " input: " << input;
238*ccdc9c3eSSadaf Ebrahimi }
239*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,ConsumeN)240*ccdc9c3eSSadaf Ebrahimi TEST(RE2, ConsumeN) {
241*ccdc9c3eSSadaf Ebrahimi   const string s(" one two three 4");
242*ccdc9c3eSSadaf Ebrahimi   StringPiece input(s);
243*ccdc9c3eSSadaf Ebrahimi 
244*ccdc9c3eSSadaf Ebrahimi   RE2::Arg argv[2];
245*ccdc9c3eSSadaf Ebrahimi   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
246*ccdc9c3eSSadaf Ebrahimi 
247*ccdc9c3eSSadaf Ebrahimi   // 0 arg
248*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 0));  // Skips "one".
249*ccdc9c3eSSadaf Ebrahimi 
250*ccdc9c3eSSadaf Ebrahimi   // 1 arg
251*ccdc9c3eSSadaf Ebrahimi   string word;
252*ccdc9c3eSSadaf Ebrahimi   argv[0] = &word;
253*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)", args, 1));
254*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("two", word);
255*ccdc9c3eSSadaf Ebrahimi 
256*ccdc9c3eSSadaf Ebrahimi   // Multi-args
257*ccdc9c3eSSadaf Ebrahimi   int n;
258*ccdc9c3eSSadaf Ebrahimi   argv[1] = &n;
259*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::ConsumeN(&input, "\\s*(\\w+)\\s*(\\d+)", args, 2));
260*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("three", word);
261*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(4, n);
262*ccdc9c3eSSadaf Ebrahimi }
263*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FindAndConsume)264*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FindAndConsume) {
265*ccdc9c3eSSadaf Ebrahimi   RE2 r("(\\w+)");      // matches a word
266*ccdc9c3eSSadaf Ebrahimi   string word;
267*ccdc9c3eSSadaf Ebrahimi 
268*ccdc9c3eSSadaf Ebrahimi   string s("   aaa b!@#$@#$cccc");
269*ccdc9c3eSSadaf Ebrahimi   StringPiece input(s);
270*ccdc9c3eSSadaf Ebrahimi 
271*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
272*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word, "aaa");
273*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
274*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word, "b");
275*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FindAndConsume(&input, r, &word));
276*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word, "cccc");
277*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FindAndConsume(&input, r, &word));
278*ccdc9c3eSSadaf Ebrahimi 
279*ccdc9c3eSSadaf Ebrahimi   // Check that FindAndConsume works without any submatches.
280*ccdc9c3eSSadaf Ebrahimi   // Earlier version used uninitialized data for
281*ccdc9c3eSSadaf Ebrahimi   // length to consume.
282*ccdc9c3eSSadaf Ebrahimi   input = "aaa";
283*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FindAndConsume(&input, "aaa"));
284*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(input, "");
285*ccdc9c3eSSadaf Ebrahimi }
286*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FindAndConsumeN)287*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FindAndConsumeN) {
288*ccdc9c3eSSadaf Ebrahimi   const string s(" one two three 4");
289*ccdc9c3eSSadaf Ebrahimi   StringPiece input(s);
290*ccdc9c3eSSadaf Ebrahimi 
291*ccdc9c3eSSadaf Ebrahimi   RE2::Arg argv[2];
292*ccdc9c3eSSadaf Ebrahimi   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
293*ccdc9c3eSSadaf Ebrahimi 
294*ccdc9c3eSSadaf Ebrahimi   // 0 arg
295*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 0));  // Skips "one".
296*ccdc9c3eSSadaf Ebrahimi 
297*ccdc9c3eSSadaf Ebrahimi   // 1 arg
298*ccdc9c3eSSadaf Ebrahimi   string word;
299*ccdc9c3eSSadaf Ebrahimi   argv[0] = &word;
300*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)", args, 1));
301*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("two", word);
302*ccdc9c3eSSadaf Ebrahimi 
303*ccdc9c3eSSadaf Ebrahimi   // Multi-args
304*ccdc9c3eSSadaf Ebrahimi   int n;
305*ccdc9c3eSSadaf Ebrahimi   argv[1] = &n;
306*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FindAndConsumeN(&input, "(\\w+)\\s*(\\d+)", args, 2));
307*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("three", word);
308*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(4, n);
309*ccdc9c3eSSadaf Ebrahimi }
310*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,MatchNumberPeculiarity)311*ccdc9c3eSSadaf Ebrahimi TEST(RE2, MatchNumberPeculiarity) {
312*ccdc9c3eSSadaf Ebrahimi   RE2 r("(foo)|(bar)|(baz)");
313*ccdc9c3eSSadaf Ebrahimi   string word1;
314*ccdc9c3eSSadaf Ebrahimi   string word2;
315*ccdc9c3eSSadaf Ebrahimi   string word3;
316*ccdc9c3eSSadaf Ebrahimi 
317*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("foo", r, &word1, &word2, &word3));
318*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word1, "foo");
319*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word2, "");
320*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word3, "");
321*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("bar", r, &word1, &word2, &word3));
322*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word1, "");
323*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word2, "bar");
324*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word3, "");
325*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("baz", r, &word1, &word2, &word3));
326*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word1, "");
327*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word2, "");
328*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(word3, "baz");
329*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::PartialMatch("f", r, &word1, &word2, &word3));
330*ccdc9c3eSSadaf Ebrahimi 
331*ccdc9c3eSSadaf Ebrahimi   string a;
332*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "(foo)|hello", &a));
333*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a, "");
334*ccdc9c3eSSadaf Ebrahimi }
335*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Match)336*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Match) {
337*ccdc9c3eSSadaf Ebrahimi   RE2 re("((\\w+):([0-9]+))");   // extracts host and port
338*ccdc9c3eSSadaf Ebrahimi   StringPiece group[4];
339*ccdc9c3eSSadaf Ebrahimi 
340*ccdc9c3eSSadaf Ebrahimi   // No match.
341*ccdc9c3eSSadaf Ebrahimi   StringPiece s = "zyzzyva";
342*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(
343*ccdc9c3eSSadaf Ebrahimi       re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
344*ccdc9c3eSSadaf Ebrahimi 
345*ccdc9c3eSSadaf Ebrahimi   // Matches and extracts.
346*ccdc9c3eSSadaf Ebrahimi   s = "a chrisr:9000 here";
347*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(
348*ccdc9c3eSSadaf Ebrahimi       re.Match(s, 0, s.size(), RE2::UNANCHORED, group, arraysize(group)));
349*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(group[0], "chrisr:9000");
350*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(group[1], "chrisr:9000");
351*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(group[2], "chrisr");
352*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(group[3], "9000");
353*ccdc9c3eSSadaf Ebrahimi 
354*ccdc9c3eSSadaf Ebrahimi   string all, host;
355*ccdc9c3eSSadaf Ebrahimi   int port;
356*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("a chrisr:9000 here", re, &all, &host, &port));
357*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(all, "chrisr:9000");
358*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(host, "chrisr");
359*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(port, 9000);
360*ccdc9c3eSSadaf Ebrahimi }
361*ccdc9c3eSSadaf Ebrahimi 
TestRecursion(int size,const char * pattern)362*ccdc9c3eSSadaf Ebrahimi static void TestRecursion(int size, const char* pattern) {
363*ccdc9c3eSSadaf Ebrahimi   // Fill up a string repeating the pattern given
364*ccdc9c3eSSadaf Ebrahimi   string domain;
365*ccdc9c3eSSadaf Ebrahimi   domain.resize(size);
366*ccdc9c3eSSadaf Ebrahimi   size_t patlen = strlen(pattern);
367*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < size; i++) {
368*ccdc9c3eSSadaf Ebrahimi     domain[i] = pattern[i % patlen];
369*ccdc9c3eSSadaf Ebrahimi   }
370*ccdc9c3eSSadaf Ebrahimi   // Just make sure it doesn't crash due to too much recursion.
371*ccdc9c3eSSadaf Ebrahimi   RE2 re("([a-zA-Z0-9]|-)+(\\.([a-zA-Z0-9]|-)+)*(\\.)?", RE2::Quiet);
372*ccdc9c3eSSadaf Ebrahimi   RE2::FullMatch(domain, re);
373*ccdc9c3eSSadaf Ebrahimi }
374*ccdc9c3eSSadaf Ebrahimi 
375*ccdc9c3eSSadaf Ebrahimi // A meta-quoted string, interpreted as a pattern, should always match
376*ccdc9c3eSSadaf Ebrahimi // the original unquoted string.
TestQuoteMeta(const string & unquoted,const RE2::Options & options=RE2::DefaultOptions)377*ccdc9c3eSSadaf Ebrahimi static void TestQuoteMeta(const string& unquoted,
378*ccdc9c3eSSadaf Ebrahimi                           const RE2::Options& options = RE2::DefaultOptions) {
379*ccdc9c3eSSadaf Ebrahimi   string quoted = RE2::QuoteMeta(unquoted);
380*ccdc9c3eSSadaf Ebrahimi   RE2 re(quoted, options);
381*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch(unquoted, re))
382*ccdc9c3eSSadaf Ebrahimi       << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
383*ccdc9c3eSSadaf Ebrahimi }
384*ccdc9c3eSSadaf Ebrahimi 
385*ccdc9c3eSSadaf Ebrahimi // A meta-quoted string, interpreted as a pattern, should always match
386*ccdc9c3eSSadaf Ebrahimi // the original unquoted string.
NegativeTestQuoteMeta(const string & unquoted,const string & should_not_match,const RE2::Options & options=RE2::DefaultOptions)387*ccdc9c3eSSadaf Ebrahimi static void NegativeTestQuoteMeta(
388*ccdc9c3eSSadaf Ebrahimi     const string& unquoted, const string& should_not_match,
389*ccdc9c3eSSadaf Ebrahimi     const RE2::Options& options = RE2::DefaultOptions) {
390*ccdc9c3eSSadaf Ebrahimi   string quoted = RE2::QuoteMeta(unquoted);
391*ccdc9c3eSSadaf Ebrahimi   RE2 re(quoted, options);
392*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch(should_not_match, re))
393*ccdc9c3eSSadaf Ebrahimi       << "Unquoted='" << unquoted << "', quoted='" << quoted << "'.";
394*ccdc9c3eSSadaf Ebrahimi }
395*ccdc9c3eSSadaf Ebrahimi 
396*ccdc9c3eSSadaf Ebrahimi // Tests that quoted meta characters match their original strings,
397*ccdc9c3eSSadaf Ebrahimi // and that a few things that shouldn't match indeed do not.
TEST(QuoteMeta,Simple)398*ccdc9c3eSSadaf Ebrahimi TEST(QuoteMeta, Simple) {
399*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("foo");
400*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("foo.bar");
401*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("foo\\.bar");
402*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("[1-9]");
403*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("1.5-2.0?");
404*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("\\d");
405*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("Who doesn't like ice cream?");
406*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("((a|b)c?d*e+[f-h]i)");
407*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("((?!)xxx).*yyy");
408*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("([");
409*ccdc9c3eSSadaf Ebrahimi }
TEST(QuoteMeta,SimpleNegative)410*ccdc9c3eSSadaf Ebrahimi TEST(QuoteMeta, SimpleNegative) {
411*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("foo", "bar");
412*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("...", "bar");
413*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("\\.", ".");
414*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("\\.", "..");
415*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("(a)", "a");
416*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("(a|b)", "a");
417*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("(a|b)", "(a)");
418*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("(a|b)", "a|b");
419*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("[0-9]", "0");
420*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("[0-9]", "0-9");
421*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("[0-9]", "[9]");
422*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("((?!)xxx)", "xxx");
423*ccdc9c3eSSadaf Ebrahimi }
424*ccdc9c3eSSadaf Ebrahimi 
TEST(QuoteMeta,Latin1)425*ccdc9c3eSSadaf Ebrahimi TEST(QuoteMeta, Latin1) {
426*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("3\xb2 = 9", RE2::Latin1);
427*ccdc9c3eSSadaf Ebrahimi }
428*ccdc9c3eSSadaf Ebrahimi 
TEST(QuoteMeta,UTF8)429*ccdc9c3eSSadaf Ebrahimi TEST(QuoteMeta, UTF8) {
430*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("Plácido Domingo");
431*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("xyz");  // No fancy utf8.
432*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("\xc2\xb0");  // 2-byte utf8 -- a degree symbol.
433*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("27\xc2\xb0 degrees");  // As a middle character.
434*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("\xe2\x80\xb3");  // 3-byte utf8 -- a double prime.
435*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("\xf0\x9d\x85\x9f");  // 4-byte utf8 -- a music note.
436*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta("27\xc2\xb0");  // Interpreted as Latin-1, this should
437*ccdc9c3eSSadaf Ebrahimi                                 // still work.
438*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta("27\xc2\xb0",
439*ccdc9c3eSSadaf Ebrahimi                         "27\\\xc2\\\xb0");  // 2-byte utf8 -- a degree symbol.
440*ccdc9c3eSSadaf Ebrahimi }
441*ccdc9c3eSSadaf Ebrahimi 
TEST(QuoteMeta,HasNull)442*ccdc9c3eSSadaf Ebrahimi TEST(QuoteMeta, HasNull) {
443*ccdc9c3eSSadaf Ebrahimi   string has_null;
444*ccdc9c3eSSadaf Ebrahimi 
445*ccdc9c3eSSadaf Ebrahimi   // string with one null character
446*ccdc9c3eSSadaf Ebrahimi   has_null += '\0';
447*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta(has_null);
448*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta(has_null, "");
449*ccdc9c3eSSadaf Ebrahimi 
450*ccdc9c3eSSadaf Ebrahimi   // Don't want null-followed-by-'1' to be interpreted as '\01'.
451*ccdc9c3eSSadaf Ebrahimi   has_null += '1';
452*ccdc9c3eSSadaf Ebrahimi   TestQuoteMeta(has_null);
453*ccdc9c3eSSadaf Ebrahimi   NegativeTestQuoteMeta(has_null, "\1");
454*ccdc9c3eSSadaf Ebrahimi }
455*ccdc9c3eSSadaf Ebrahimi 
TEST(ProgramSize,BigProgram)456*ccdc9c3eSSadaf Ebrahimi TEST(ProgramSize, BigProgram) {
457*ccdc9c3eSSadaf Ebrahimi   RE2 re_simple("simple regexp");
458*ccdc9c3eSSadaf Ebrahimi   RE2 re_medium("medium.*regexp");
459*ccdc9c3eSSadaf Ebrahimi   RE2 re_complex("complex.{1,128}regexp");
460*ccdc9c3eSSadaf Ebrahimi 
461*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_simple.ProgramSize(), 0);
462*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_medium.ProgramSize(), re_simple.ProgramSize());
463*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_complex.ProgramSize(), re_medium.ProgramSize());
464*ccdc9c3eSSadaf Ebrahimi 
465*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_simple.ReverseProgramSize(), 0);
466*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_medium.ReverseProgramSize(), re_simple.ReverseProgramSize());
467*ccdc9c3eSSadaf Ebrahimi   ASSERT_GT(re_complex.ReverseProgramSize(), re_medium.ReverseProgramSize());
468*ccdc9c3eSSadaf Ebrahimi }
469*ccdc9c3eSSadaf Ebrahimi 
TEST(ProgramFanout,BigProgram)470*ccdc9c3eSSadaf Ebrahimi TEST(ProgramFanout, BigProgram) {
471*ccdc9c3eSSadaf Ebrahimi   RE2 re1("(?:(?:(?:(?:(?:.)?){1})*)+)");
472*ccdc9c3eSSadaf Ebrahimi   RE2 re10("(?:(?:(?:(?:(?:.)?){10})*)+)");
473*ccdc9c3eSSadaf Ebrahimi   RE2 re100("(?:(?:(?:(?:(?:.)?){100})*)+)");
474*ccdc9c3eSSadaf Ebrahimi   RE2 re1000("(?:(?:(?:(?:(?:.)?){1000})*)+)");
475*ccdc9c3eSSadaf Ebrahimi 
476*ccdc9c3eSSadaf Ebrahimi   std::map<int, int> histogram;
477*ccdc9c3eSSadaf Ebrahimi 
478*ccdc9c3eSSadaf Ebrahimi   // 3 is the largest non-empty bucket and has 1 element.
479*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(3, re1.ProgramFanout(&histogram));
480*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(1, histogram[3]);
481*ccdc9c3eSSadaf Ebrahimi 
482*ccdc9c3eSSadaf Ebrahimi   // 7 is the largest non-empty bucket and has 10 elements.
483*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(7, re10.ProgramFanout(&histogram));
484*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(10, histogram[7]);
485*ccdc9c3eSSadaf Ebrahimi 
486*ccdc9c3eSSadaf Ebrahimi   // 10 is the largest non-empty bucket and has 100 elements.
487*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(10, re100.ProgramFanout(&histogram));
488*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(100, histogram[10]);
489*ccdc9c3eSSadaf Ebrahimi 
490*ccdc9c3eSSadaf Ebrahimi   // 13 is the largest non-empty bucket and has 1000 elements.
491*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(13, re1000.ProgramFanout(&histogram));
492*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(1000, histogram[13]);
493*ccdc9c3eSSadaf Ebrahimi 
494*ccdc9c3eSSadaf Ebrahimi   // 2 is the largest non-empty bucket and has 3 elements.
495*ccdc9c3eSSadaf Ebrahimi   // This differs from the others due to how reverse `.' works.
496*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(2, re1.ReverseProgramFanout(&histogram));
497*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(3, histogram[2]);
498*ccdc9c3eSSadaf Ebrahimi 
499*ccdc9c3eSSadaf Ebrahimi   // 5 is the largest non-empty bucket and has 10 elements.
500*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(5, re10.ReverseProgramFanout(&histogram));
501*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(10, histogram[5]);
502*ccdc9c3eSSadaf Ebrahimi 
503*ccdc9c3eSSadaf Ebrahimi   // 9 is the largest non-empty bucket and has 100 elements.
504*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(9, re100.ReverseProgramFanout(&histogram));
505*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(100, histogram[9]);
506*ccdc9c3eSSadaf Ebrahimi 
507*ccdc9c3eSSadaf Ebrahimi   // 12 is the largest non-empty bucket and has 1000 elements.
508*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(12, re1000.ReverseProgramFanout(&histogram));
509*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(1000, histogram[12]);
510*ccdc9c3eSSadaf Ebrahimi }
511*ccdc9c3eSSadaf Ebrahimi 
512*ccdc9c3eSSadaf Ebrahimi // Issue 956519: handling empty character sets was
513*ccdc9c3eSSadaf Ebrahimi // causing NULL dereference.  This tests a few empty character sets.
514*ccdc9c3eSSadaf Ebrahimi // (The way to get an empty character set is to negate a full one.)
TEST(EmptyCharset,Fuzz)515*ccdc9c3eSSadaf Ebrahimi TEST(EmptyCharset, Fuzz) {
516*ccdc9c3eSSadaf Ebrahimi   static const char *empties[] = {
517*ccdc9c3eSSadaf Ebrahimi     "[^\\S\\s]",
518*ccdc9c3eSSadaf Ebrahimi     "[^\\S[:space:]]",
519*ccdc9c3eSSadaf Ebrahimi     "[^\\D\\d]",
520*ccdc9c3eSSadaf Ebrahimi     "[^\\D[:digit:]]"
521*ccdc9c3eSSadaf Ebrahimi   };
522*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(empties); i++)
523*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2(empties[i]).Match("abc", 0, 3, RE2::UNANCHORED, NULL, 0));
524*ccdc9c3eSSadaf Ebrahimi }
525*ccdc9c3eSSadaf Ebrahimi 
526*ccdc9c3eSSadaf Ebrahimi // Bitstate assumes that kInstFail instructions in
527*ccdc9c3eSSadaf Ebrahimi // alternations or capture groups have been "compiled away".
TEST(EmptyCharset,BitstateAssumptions)528*ccdc9c3eSSadaf Ebrahimi TEST(EmptyCharset, BitstateAssumptions) {
529*ccdc9c3eSSadaf Ebrahimi   // Captures trigger use of Bitstate.
530*ccdc9c3eSSadaf Ebrahimi   static const char *nop_empties[] = {
531*ccdc9c3eSSadaf Ebrahimi     "((((()))))" "[^\\S\\s]?",
532*ccdc9c3eSSadaf Ebrahimi     "((((()))))" "([^\\S\\s])?",
533*ccdc9c3eSSadaf Ebrahimi     "((((()))))" "([^\\S\\s]|[^\\S\\s])?",
534*ccdc9c3eSSadaf Ebrahimi     "((((()))))" "(([^\\S\\s]|[^\\S\\s])|)"
535*ccdc9c3eSSadaf Ebrahimi   };
536*ccdc9c3eSSadaf Ebrahimi   StringPiece group[6];
537*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(nop_empties); i++)
538*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2(nop_empties[i]).Match("", 0, 0, RE2::UNANCHORED, group, 6));
539*ccdc9c3eSSadaf Ebrahimi }
540*ccdc9c3eSSadaf Ebrahimi 
541*ccdc9c3eSSadaf Ebrahimi // Test that named groups work correctly.
TEST(Capture,NamedGroups)542*ccdc9c3eSSadaf Ebrahimi TEST(Capture, NamedGroups) {
543*ccdc9c3eSSadaf Ebrahimi   {
544*ccdc9c3eSSadaf Ebrahimi     RE2 re("(hello world)");
545*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(re.NumberOfCapturingGroups(), 1);
546*ccdc9c3eSSadaf Ebrahimi     const std::map<string, int>& m = re.NamedCapturingGroups();
547*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.size(), 0);
548*ccdc9c3eSSadaf Ebrahimi   }
549*ccdc9c3eSSadaf Ebrahimi 
550*ccdc9c3eSSadaf Ebrahimi   {
551*ccdc9c3eSSadaf Ebrahimi     RE2 re("(?P<A>expr(?P<B>expr)(?P<C>expr))((expr)(?P<D>expr))");
552*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(re.NumberOfCapturingGroups(), 6);
553*ccdc9c3eSSadaf Ebrahimi     const std::map<string, int>& m = re.NamedCapturingGroups();
554*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.size(), 4);
555*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.find("A")->second, 1);
556*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.find("B")->second, 2);
557*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.find("C")->second, 3);
558*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(m.find("D")->second, 6);  // $4 and $5 are anonymous
559*ccdc9c3eSSadaf Ebrahimi   }
560*ccdc9c3eSSadaf Ebrahimi }
561*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,CapturedGroupTest)562*ccdc9c3eSSadaf Ebrahimi TEST(RE2, CapturedGroupTest) {
563*ccdc9c3eSSadaf Ebrahimi   RE2 re("directions from (?P<S>.*) to (?P<D>.*)");
564*ccdc9c3eSSadaf Ebrahimi   int num_groups = re.NumberOfCapturingGroups();
565*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(2, num_groups);
566*ccdc9c3eSSadaf Ebrahimi   string args[4];
567*ccdc9c3eSSadaf Ebrahimi   RE2::Arg arg0(&args[0]);
568*ccdc9c3eSSadaf Ebrahimi   RE2::Arg arg1(&args[1]);
569*ccdc9c3eSSadaf Ebrahimi   RE2::Arg arg2(&args[2]);
570*ccdc9c3eSSadaf Ebrahimi   RE2::Arg arg3(&args[3]);
571*ccdc9c3eSSadaf Ebrahimi 
572*ccdc9c3eSSadaf Ebrahimi   const RE2::Arg* const matches[4] = {&arg0, &arg1, &arg2, &arg3};
573*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatchN("directions from mountain view to san jose",
574*ccdc9c3eSSadaf Ebrahimi                               re, matches, num_groups));
575*ccdc9c3eSSadaf Ebrahimi   const std::map<string, int>& named_groups = re.NamedCapturingGroups();
576*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(named_groups.find("S") != named_groups.end());
577*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(named_groups.find("D") != named_groups.end());
578*ccdc9c3eSSadaf Ebrahimi 
579*ccdc9c3eSSadaf Ebrahimi   // The named group index is 1-based.
580*ccdc9c3eSSadaf Ebrahimi   int source_group_index = named_groups.find("S")->second;
581*ccdc9c3eSSadaf Ebrahimi   int destination_group_index = named_groups.find("D")->second;
582*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(1, source_group_index);
583*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(2, destination_group_index);
584*ccdc9c3eSSadaf Ebrahimi 
585*ccdc9c3eSSadaf Ebrahimi   // The args is zero-based.
586*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("mountain view", args[source_group_index - 1]);
587*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("san jose", args[destination_group_index - 1]);
588*ccdc9c3eSSadaf Ebrahimi }
589*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchWithNoArgs)590*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchWithNoArgs) {
591*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("h", "h"));
592*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "hello"));
593*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "h.*o"));
594*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("othello", "h.*o"));  // Must be anchored at front
595*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("hello!", "h.*o"));   // Must be anchored at end
596*ccdc9c3eSSadaf Ebrahimi }
597*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,PartialMatch)598*ccdc9c3eSSadaf Ebrahimi TEST(RE2, PartialMatch) {
599*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("x", "x"));
600*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("hello", "h.*o"));
601*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("othello", "h.*o"));
602*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("hello!", "h.*o"));
603*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("x", "((((((((((((((((((((x))))))))))))))))))))"));
604*ccdc9c3eSSadaf Ebrahimi }
605*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,PartialMatchN)606*ccdc9c3eSSadaf Ebrahimi TEST(RE2, PartialMatchN) {
607*ccdc9c3eSSadaf Ebrahimi   RE2::Arg argv[2];
608*ccdc9c3eSSadaf Ebrahimi   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
609*ccdc9c3eSSadaf Ebrahimi 
610*ccdc9c3eSSadaf Ebrahimi   // 0 arg
611*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatchN("hello", "e.*o", args, 0));
612*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatchN("othello", "a.*o", args, 0));
613*ccdc9c3eSSadaf Ebrahimi 
614*ccdc9c3eSSadaf Ebrahimi   // 1 arg
615*ccdc9c3eSSadaf Ebrahimi   int i;
616*ccdc9c3eSSadaf Ebrahimi   argv[0] = &i;
617*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatchN("1001 nights", "(\\d+)", args, 1));
618*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(1001, i);
619*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatchN("three", "(\\d+)", args, 1));
620*ccdc9c3eSSadaf Ebrahimi 
621*ccdc9c3eSSadaf Ebrahimi   // Multi-arg
622*ccdc9c3eSSadaf Ebrahimi   string s;
623*ccdc9c3eSSadaf Ebrahimi   argv[1] = &s;
624*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatchN("answer: 42:life", "(\\d+):(\\w+)", args, 2));
625*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(42, i);
626*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("life", s);
627*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatchN("hi1", "(\\w+)(1)", args, 2));
628*ccdc9c3eSSadaf Ebrahimi }
629*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchZeroArg)630*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchZeroArg) {
631*ccdc9c3eSSadaf Ebrahimi   // Zero-arg
632*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1001", "\\d+"));
633*ccdc9c3eSSadaf Ebrahimi }
634*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchOneArg)635*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchOneArg) {
636*ccdc9c3eSSadaf Ebrahimi   int i;
637*ccdc9c3eSSadaf Ebrahimi 
638*ccdc9c3eSSadaf Ebrahimi   // Single-arg
639*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1001", "(\\d+)",   &i));
640*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1001);
641*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("-123", "(-?\\d+)", &i));
642*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, -123);
643*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("10", "()\\d+", &i));
644*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(
645*ccdc9c3eSSadaf Ebrahimi       RE2::FullMatch("1234567890123456789012345678901234567890", "(\\d+)", &i));
646*ccdc9c3eSSadaf Ebrahimi }
647*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchIntegerArg)648*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchIntegerArg) {
649*ccdc9c3eSSadaf Ebrahimi   int i;
650*ccdc9c3eSSadaf Ebrahimi 
651*ccdc9c3eSSadaf Ebrahimi   // Digits surrounding integer-arg
652*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234", "1(\\d*)4", &i));
653*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 23);
654*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)\\d+", &i));
655*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1);
656*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("-1234", "(-\\d)\\d+", &i));
657*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, -1);
658*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("1234", "(\\d)", &i));
659*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1);
660*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch("-1234", "(-\\d)", &i));
661*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, -1);
662*ccdc9c3eSSadaf Ebrahimi }
663*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchStringArg)664*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchStringArg) {
665*ccdc9c3eSSadaf Ebrahimi   string s;
666*ccdc9c3eSSadaf Ebrahimi   // String-arg
667*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", &s));
668*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("ell"));
669*ccdc9c3eSSadaf Ebrahimi }
670*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchStringPieceArg)671*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchStringPieceArg) {
672*ccdc9c3eSSadaf Ebrahimi   int i;
673*ccdc9c3eSSadaf Ebrahimi   // StringPiece-arg
674*ccdc9c3eSSadaf Ebrahimi   StringPiece sp;
675*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &sp, &i));
676*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(sp.size(), 4);
677*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(memcmp(sp.data(), "ruby", 4) == 0);
678*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1234);
679*ccdc9c3eSSadaf Ebrahimi }
680*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchMultiArg)681*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchMultiArg) {
682*ccdc9c3eSSadaf Ebrahimi   int i;
683*ccdc9c3eSSadaf Ebrahimi   string s;
684*ccdc9c3eSSadaf Ebrahimi   // Multi-arg
685*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("ruby:1234", "(\\w+):(\\d+)", &s, &i));
686*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("ruby"));
687*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1234);
688*ccdc9c3eSSadaf Ebrahimi }
689*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchN)690*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchN) {
691*ccdc9c3eSSadaf Ebrahimi   RE2::Arg argv[2];
692*ccdc9c3eSSadaf Ebrahimi   const RE2::Arg* const args[2] = { &argv[0], &argv[1] };
693*ccdc9c3eSSadaf Ebrahimi 
694*ccdc9c3eSSadaf Ebrahimi   // 0 arg
695*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatchN("hello", "h.*o", args, 0));
696*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatchN("othello", "h.*o", args, 0));
697*ccdc9c3eSSadaf Ebrahimi 
698*ccdc9c3eSSadaf Ebrahimi   // 1 arg
699*ccdc9c3eSSadaf Ebrahimi   int i;
700*ccdc9c3eSSadaf Ebrahimi   argv[0] = &i;
701*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatchN("1001", "(\\d+)", args, 1));
702*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(1001, i);
703*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatchN("three", "(\\d+)", args, 1));
704*ccdc9c3eSSadaf Ebrahimi 
705*ccdc9c3eSSadaf Ebrahimi   // Multi-arg
706*ccdc9c3eSSadaf Ebrahimi   string s;
707*ccdc9c3eSSadaf Ebrahimi   argv[1] = &s;
708*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatchN("42:life", "(\\d+):(\\w+)", args, 2));
709*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(42, i);
710*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("life", s);
711*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatchN("hi1", "(\\w+)(1)", args, 2));
712*ccdc9c3eSSadaf Ebrahimi }
713*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchIgnoredArg)714*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchIgnoredArg) {
715*ccdc9c3eSSadaf Ebrahimi   int i;
716*ccdc9c3eSSadaf Ebrahimi   string s;
717*ccdc9c3eSSadaf Ebrahimi 
718*ccdc9c3eSSadaf Ebrahimi   // Old-school NULL should be ignored.
719*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(
720*ccdc9c3eSSadaf Ebrahimi       RE2::FullMatch("ruby:1234", "(\\w+)(:)(\\d+)", &s, (void*)NULL, &i));
721*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("ruby"));
722*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1234);
723*ccdc9c3eSSadaf Ebrahimi 
724*ccdc9c3eSSadaf Ebrahimi   // C++11 nullptr should also be ignored.
725*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("rubz:1235", "(\\w+)(:)(\\d+)", &s, nullptr, &i));
726*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("rubz"));
727*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(i, 1235);
728*ccdc9c3eSSadaf Ebrahimi }
729*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchTypedNullArg)730*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchTypedNullArg) {
731*ccdc9c3eSSadaf Ebrahimi   string s;
732*ccdc9c3eSSadaf Ebrahimi 
733*ccdc9c3eSSadaf Ebrahimi   // Ignore non-void* NULL arg
734*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "he(.*)lo", (char*)NULL));
735*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (string*)NULL));
736*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("hello", "h(.*)o", (StringPiece*)NULL));
737*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234", "(.*)", (int*)NULL));
738*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234567890123456", "(.*)", (long long*)NULL));
739*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (double*)NULL));
740*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("123.4567890123456", "(.*)", (float*)NULL));
741*ccdc9c3eSSadaf Ebrahimi 
742*ccdc9c3eSSadaf Ebrahimi   // Fail on non-void* NULL arg if the match doesn't parse for the given type.
743*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("hello", "h(.*)lo", &s, (char*)NULL));
744*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (int*)NULL));
745*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("1234567890123456", "(.*)", (int*)NULL));
746*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (double*)NULL));
747*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("hello", "(.*)", (float*)NULL));
748*ccdc9c3eSSadaf Ebrahimi }
749*ccdc9c3eSSadaf Ebrahimi 
750*ccdc9c3eSSadaf Ebrahimi // Check that numeric parsing code does not read past the end of
751*ccdc9c3eSSadaf Ebrahimi // the number being parsed.
752*ccdc9c3eSSadaf Ebrahimi // This implementation requires mmap(2) et al. and thus cannot
753*ccdc9c3eSSadaf Ebrahimi // be used unless they are available.
TEST(RE2,NULTerminated)754*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NULTerminated) {
755*ccdc9c3eSSadaf Ebrahimi #if defined(_POSIX_MAPPED_FILES) && _POSIX_MAPPED_FILES > 0
756*ccdc9c3eSSadaf Ebrahimi   char *v;
757*ccdc9c3eSSadaf Ebrahimi   int x;
758*ccdc9c3eSSadaf Ebrahimi   long pagesize = sysconf(_SC_PAGE_SIZE);
759*ccdc9c3eSSadaf Ebrahimi 
760*ccdc9c3eSSadaf Ebrahimi #ifndef MAP_ANONYMOUS
761*ccdc9c3eSSadaf Ebrahimi #define MAP_ANONYMOUS MAP_ANON
762*ccdc9c3eSSadaf Ebrahimi #endif
763*ccdc9c3eSSadaf Ebrahimi   v = static_cast<char*>(mmap(NULL, 2*pagesize, PROT_READ|PROT_WRITE,
764*ccdc9c3eSSadaf Ebrahimi                               MAP_ANONYMOUS|MAP_PRIVATE, -1, 0));
765*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(v != reinterpret_cast<char*>(-1));
766*ccdc9c3eSSadaf Ebrahimi   LOG(INFO) << "Memory at " << (void*)v;
767*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(munmap(v + pagesize, pagesize), 0) << " error " << errno;
768*ccdc9c3eSSadaf Ebrahimi   v[pagesize - 1] = '1';
769*ccdc9c3eSSadaf Ebrahimi 
770*ccdc9c3eSSadaf Ebrahimi   x = 0;
771*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(StringPiece(v + pagesize - 1, 1), "(.*)", &x));
772*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(x, 1);
773*ccdc9c3eSSadaf Ebrahimi #endif
774*ccdc9c3eSSadaf Ebrahimi }
775*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchTypeTests)776*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchTypeTests) {
777*ccdc9c3eSSadaf Ebrahimi   // Type tests
778*ccdc9c3eSSadaf Ebrahimi   string zeros(1000, '0');
779*ccdc9c3eSSadaf Ebrahimi   {
780*ccdc9c3eSSadaf Ebrahimi     char c;
781*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
782*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(c, 'H');
783*ccdc9c3eSSadaf Ebrahimi   }
784*ccdc9c3eSSadaf Ebrahimi   {
785*ccdc9c3eSSadaf Ebrahimi     unsigned char c;
786*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("Hello", "(H)ello", &c));
787*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(c, static_cast<unsigned char>('H'));
788*ccdc9c3eSSadaf Ebrahimi   }
789*ccdc9c3eSSadaf Ebrahimi   {
790*ccdc9c3eSSadaf Ebrahimi     int16_t v;
791*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",     "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
792*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100",    "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
793*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("32767",   "(-?\\d+)", &v)); ASSERT_EQ(v, 32767);
794*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-32768",  "(-?\\d+)", &v)); ASSERT_EQ(v, -32768);
795*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("-32769", "(-?\\d+)", &v));
796*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("32768",  "(-?\\d+)", &v));
797*ccdc9c3eSSadaf Ebrahimi   }
798*ccdc9c3eSSadaf Ebrahimi   {
799*ccdc9c3eSSadaf Ebrahimi     uint16_t v;
800*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",    "(\\d+)", &v)); ASSERT_EQ(v, 100);
801*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("32767",  "(\\d+)", &v)); ASSERT_EQ(v, 32767);
802*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("65535",  "(\\d+)", &v)); ASSERT_EQ(v, 65535);
803*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("65536", "(\\d+)", &v));
804*ccdc9c3eSSadaf Ebrahimi   }
805*ccdc9c3eSSadaf Ebrahimi   {
806*ccdc9c3eSSadaf Ebrahimi     int32_t v;
807*ccdc9c3eSSadaf Ebrahimi     static const int32_t max = INT32_C(0x7fffffff);
808*ccdc9c3eSSadaf Ebrahimi     static const int32_t min = -max - 1;
809*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",          "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
810*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100",         "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
811*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("2147483647",   "(-?\\d+)", &v)); ASSERT_EQ(v, max);
812*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-2147483648",  "(-?\\d+)", &v)); ASSERT_EQ(v, min);
813*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("-2147483649", "(-?\\d+)", &v));
814*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("2147483648",  "(-?\\d+)", &v));
815*ccdc9c3eSSadaf Ebrahimi 
816*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(zeros + "2147483647", "(-?\\d+)", &v));
817*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, max);
818*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-" + zeros + "2147483648", "(-?\\d+)", &v));
819*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, min);
820*ccdc9c3eSSadaf Ebrahimi 
821*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("-" + zeros + "2147483649", "(-?\\d+)", &v));
822*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("0x7fffffff", "(.*)", RE2::CRadix(&v)));
823*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, max);
824*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("000x7fffffff", "(.*)", RE2::CRadix(&v)));
825*ccdc9c3eSSadaf Ebrahimi   }
826*ccdc9c3eSSadaf Ebrahimi   {
827*ccdc9c3eSSadaf Ebrahimi     uint32_t v;
828*ccdc9c3eSSadaf Ebrahimi     static const uint32_t max = UINT32_C(0xffffffff);
829*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",         "(\\d+)", &v)); ASSERT_EQ(v, 100);
830*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("4294967295",  "(\\d+)", &v)); ASSERT_EQ(v, max);
831*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("4294967296", "(\\d+)", &v));
832*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch("-1",         "(\\d+)", &v));
833*ccdc9c3eSSadaf Ebrahimi 
834*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(zeros + "4294967295", "(\\d+)", &v)); ASSERT_EQ(v, max);
835*ccdc9c3eSSadaf Ebrahimi   }
836*ccdc9c3eSSadaf Ebrahimi   {
837*ccdc9c3eSSadaf Ebrahimi     int64_t v;
838*ccdc9c3eSSadaf Ebrahimi     static const int64_t max = INT64_C(0x7fffffffffffffff);
839*ccdc9c3eSSadaf Ebrahimi     static const int64_t min = -max - 1;
840*ccdc9c3eSSadaf Ebrahimi     string str;
841*ccdc9c3eSSadaf Ebrahimi 
842*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v)); ASSERT_EQ(v, 100);
843*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v)); ASSERT_EQ(v, -100);
844*ccdc9c3eSSadaf Ebrahimi 
845*ccdc9c3eSSadaf Ebrahimi     str = std::to_string(max);
846*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
847*ccdc9c3eSSadaf Ebrahimi 
848*ccdc9c3eSSadaf Ebrahimi     str = std::to_string(min);
849*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, min);
850*ccdc9c3eSSadaf Ebrahimi 
851*ccdc9c3eSSadaf Ebrahimi     str = std::to_string(max);
852*ccdc9c3eSSadaf Ebrahimi     ASSERT_NE(str.back(), '9');
853*ccdc9c3eSSadaf Ebrahimi     str.back()++;
854*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
855*ccdc9c3eSSadaf Ebrahimi 
856*ccdc9c3eSSadaf Ebrahimi     str = std::to_string(min);
857*ccdc9c3eSSadaf Ebrahimi     ASSERT_NE(str.back(), '9');
858*ccdc9c3eSSadaf Ebrahimi     str.back()++;
859*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
860*ccdc9c3eSSadaf Ebrahimi   }
861*ccdc9c3eSSadaf Ebrahimi   {
862*ccdc9c3eSSadaf Ebrahimi     uint64_t v;
863*ccdc9c3eSSadaf Ebrahimi     int64_t v2;
864*ccdc9c3eSSadaf Ebrahimi     static const uint64_t max = UINT64_C(0xffffffffffffffff);
865*ccdc9c3eSSadaf Ebrahimi     string str;
866*ccdc9c3eSSadaf Ebrahimi 
867*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",  "(-?\\d+)", &v));  ASSERT_EQ(v, 100);
868*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100", "(-?\\d+)", &v2)); ASSERT_EQ(v2, -100);
869*ccdc9c3eSSadaf Ebrahimi 
870*ccdc9c3eSSadaf Ebrahimi     str = std::to_string(max);
871*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(str,    "(-?\\d+)", &v)); ASSERT_EQ(v, max);
872*ccdc9c3eSSadaf Ebrahimi 
873*ccdc9c3eSSadaf Ebrahimi     ASSERT_NE(str.back(), '9');
874*ccdc9c3eSSadaf Ebrahimi     str.back()++;
875*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(str,   "(-?\\d+)", &v));
876*ccdc9c3eSSadaf Ebrahimi   }
877*ccdc9c3eSSadaf Ebrahimi }
878*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FloatingPointFullMatchTypes)879*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FloatingPointFullMatchTypes) {
880*ccdc9c3eSSadaf Ebrahimi   string zeros(1000, '0');
881*ccdc9c3eSSadaf Ebrahimi   {
882*ccdc9c3eSSadaf Ebrahimi     float v;
883*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
884*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
885*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, float(1e23));
886*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(" 100",  "(.*)", &v)); ASSERT_EQ(v, 100);
887*ccdc9c3eSSadaf Ebrahimi 
888*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(zeros + "1e23",  "(.*)", &v));
889*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, float(1e23));
890*ccdc9c3eSSadaf Ebrahimi 
891*ccdc9c3eSSadaf Ebrahimi     // 6700000000081920.1 is an edge case.
892*ccdc9c3eSSadaf Ebrahimi     // 6700000000081920 is exactly halfway between
893*ccdc9c3eSSadaf Ebrahimi     // two float32s, so the .1 should make it round up.
894*ccdc9c3eSSadaf Ebrahimi     // However, the .1 is outside the precision possible with
895*ccdc9c3eSSadaf Ebrahimi     // a float64: the nearest float64 is 6700000000081920.
896*ccdc9c3eSSadaf Ebrahimi     // So if the code uses strtod and then converts to float32,
897*ccdc9c3eSSadaf Ebrahimi     // round-to-even will make it round down instead of up.
898*ccdc9c3eSSadaf Ebrahimi     // To pass the test, the parser must call strtof directly.
899*ccdc9c3eSSadaf Ebrahimi     // This test case is carefully chosen to use only a 17-digit
900*ccdc9c3eSSadaf Ebrahimi     // number, since C does not guarantee to get the correctly
901*ccdc9c3eSSadaf Ebrahimi     // rounded answer for strtod and strtof unless the input is
902*ccdc9c3eSSadaf Ebrahimi     // short.
903*ccdc9c3eSSadaf Ebrahimi     //
904*ccdc9c3eSSadaf Ebrahimi     // This is known to fail on Cygwin and MinGW due to a broken
905*ccdc9c3eSSadaf Ebrahimi     // implementation of strtof(3). And apparently MSVC too. Sigh.
906*ccdc9c3eSSadaf Ebrahimi #if !defined(_MSC_VER) && !defined(__CYGWIN__) && !defined(__MINGW32__)
907*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
908*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0.1f) << StringPrintf("%.8g != %.8g", v, 0.1f);
909*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("6700000000081920.1", "(.*)", &v));
910*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 6700000000081920.1f)
911*ccdc9c3eSSadaf Ebrahimi       << StringPrintf("%.8g != %.8g", v, 6700000000081920.1f);
912*ccdc9c3eSSadaf Ebrahimi #endif
913*ccdc9c3eSSadaf Ebrahimi   }
914*ccdc9c3eSSadaf Ebrahimi   {
915*ccdc9c3eSSadaf Ebrahimi     double v;
916*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("100",   "(.*)", &v)); ASSERT_EQ(v, 100);
917*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("-100.", "(.*)", &v)); ASSERT_EQ(v, -100);
918*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("1e23",  "(.*)", &v)); ASSERT_EQ(v, 1e23);
919*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch(zeros + "1e23", "(.*)", &v));
920*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, double(1e23));
921*ccdc9c3eSSadaf Ebrahimi 
922*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("0.1", "(.*)", &v));
923*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 0.1) << StringPrintf("%.17g != %.17g", v, 0.1);
924*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::FullMatch("1.00000005960464485", "(.*)", &v));
925*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(v, 1.0000000596046448)
926*ccdc9c3eSSadaf Ebrahimi       << StringPrintf("%.17g != %.17g", v, 1.0000000596046448);
927*ccdc9c3eSSadaf Ebrahimi   }
928*ccdc9c3eSSadaf Ebrahimi }
929*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchAnchored)930*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchAnchored) {
931*ccdc9c3eSSadaf Ebrahimi   int i;
932*ccdc9c3eSSadaf Ebrahimi   // Check that matching is fully anchored
933*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("x1001", "(\\d+)",  &i));
934*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("1001x", "(\\d+)",  &i));
935*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("x1001",  "x(\\d+)", &i)); ASSERT_EQ(i, 1001);
936*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1001x",  "(\\d+)x", &i)); ASSERT_EQ(i, 1001);
937*ccdc9c3eSSadaf Ebrahimi }
938*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchBraces)939*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchBraces) {
940*ccdc9c3eSSadaf Ebrahimi   // Braces
941*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("0abcd",  "[0-9a-f+.-]{5,}"));
942*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("0abcde", "[0-9a-f+.-]{5,}"));
943*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("0abc",  "[0-9a-f+.-]{5,}"));
944*ccdc9c3eSSadaf Ebrahimi }
945*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Complicated)946*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Complicated) {
947*ccdc9c3eSSadaf Ebrahimi   // Complicated RE2
948*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("foo", "foo|bar|[A-Z]"));
949*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("bar", "foo|bar|[A-Z]"));
950*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("X",   "foo|bar|[A-Z]"));
951*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("XY", "foo|bar|[A-Z]"));
952*ccdc9c3eSSadaf Ebrahimi }
953*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchEnd)954*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchEnd) {
955*ccdc9c3eSSadaf Ebrahimi   // Check full-match handling (needs '$' tacked on internally)
956*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo"));
957*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo"));
958*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("fo", "fo|foo$"));
959*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("foo", "fo|foo$"));
960*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("foo", "foo$"));
961*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("foo$bar", "foo\\$"));
962*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch("fox", "fo|bar"));
963*ccdc9c3eSSadaf Ebrahimi 
964*ccdc9c3eSSadaf Ebrahimi   // Uncomment the following if we change the handling of '$' to
965*ccdc9c3eSSadaf Ebrahimi   // prevent it from matching a trailing newline
966*ccdc9c3eSSadaf Ebrahimi   if (false) {
967*ccdc9c3eSSadaf Ebrahimi     // Check that we don't get bitten by pcre's special handling of a
968*ccdc9c3eSSadaf Ebrahimi     // '\n' at the end of the string matching '$'
969*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::PartialMatch("foo\n", "foo$"));
970*ccdc9c3eSSadaf Ebrahimi   }
971*ccdc9c3eSSadaf Ebrahimi }
972*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,FullMatchArgCount)973*ccdc9c3eSSadaf Ebrahimi TEST(RE2, FullMatchArgCount) {
974*ccdc9c3eSSadaf Ebrahimi   // Number of args
975*ccdc9c3eSSadaf Ebrahimi   int a[16];
976*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("", ""));
977*ccdc9c3eSSadaf Ebrahimi 
978*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
979*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1", "(\\d){1}", &a[0]));
980*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
981*ccdc9c3eSSadaf Ebrahimi 
982*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
983*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("12", "(\\d)(\\d)", &a[0], &a[1]));
984*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
985*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
986*ccdc9c3eSSadaf Ebrahimi 
987*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
988*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("123", "(\\d)(\\d)(\\d)", &a[0], &a[1], &a[2]));
989*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
990*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
991*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
992*ccdc9c3eSSadaf Ebrahimi 
993*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
994*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234", "(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
995*ccdc9c3eSSadaf Ebrahimi                              &a[2], &a[3]));
996*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
997*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
998*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
999*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[3], 4);
1000*ccdc9c3eSSadaf Ebrahimi 
1001*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
1002*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("12345", "(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0], &a[1],
1003*ccdc9c3eSSadaf Ebrahimi                              &a[2], &a[3], &a[4]));
1004*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
1005*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
1006*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
1007*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[3], 4);
1008*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[4], 5);
1009*ccdc9c3eSSadaf Ebrahimi 
1010*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
1011*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("123456", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)", &a[0],
1012*ccdc9c3eSSadaf Ebrahimi                              &a[1], &a[2], &a[3], &a[4], &a[5]));
1013*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
1014*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
1015*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
1016*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[3], 4);
1017*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[4], 5);
1018*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[5], 6);
1019*ccdc9c3eSSadaf Ebrahimi 
1020*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
1021*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234567", "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1022*ccdc9c3eSSadaf Ebrahimi                              &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6]));
1023*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
1024*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
1025*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
1026*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[3], 4);
1027*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[4], 5);
1028*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[5], 6);
1029*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[6], 7);
1030*ccdc9c3eSSadaf Ebrahimi 
1031*ccdc9c3eSSadaf Ebrahimi   memset(a, 0, sizeof(0));
1032*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch("1234567890123456",
1033*ccdc9c3eSSadaf Ebrahimi                              "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)"
1034*ccdc9c3eSSadaf Ebrahimi                              "(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)(\\d)",
1035*ccdc9c3eSSadaf Ebrahimi                              &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6],
1036*ccdc9c3eSSadaf Ebrahimi                              &a[7], &a[8], &a[9], &a[10], &a[11], &a[12],
1037*ccdc9c3eSSadaf Ebrahimi                              &a[13], &a[14], &a[15]));
1038*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[0], 1);
1039*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[1], 2);
1040*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[2], 3);
1041*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[3], 4);
1042*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[4], 5);
1043*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[5], 6);
1044*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[6], 7);
1045*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[7], 8);
1046*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[8], 9);
1047*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[9], 0);
1048*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[10], 1);
1049*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[11], 2);
1050*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[12], 3);
1051*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[13], 4);
1052*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[14], 5);
1053*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(a[15], 6);
1054*ccdc9c3eSSadaf Ebrahimi }
1055*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Accessors)1056*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Accessors) {
1057*ccdc9c3eSSadaf Ebrahimi   // Check the pattern() accessor
1058*ccdc9c3eSSadaf Ebrahimi   {
1059*ccdc9c3eSSadaf Ebrahimi     const string kPattern = "http://([^/]+)/.*";
1060*ccdc9c3eSSadaf Ebrahimi     const RE2 re(kPattern);
1061*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(kPattern, re.pattern());
1062*ccdc9c3eSSadaf Ebrahimi   }
1063*ccdc9c3eSSadaf Ebrahimi 
1064*ccdc9c3eSSadaf Ebrahimi   // Check RE2 error field.
1065*ccdc9c3eSSadaf Ebrahimi   {
1066*ccdc9c3eSSadaf Ebrahimi     RE2 re("foo");
1067*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(re.error().empty());  // Must have no error
1068*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(re.ok());
1069*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(re.error_code(), RE2::NoError);
1070*ccdc9c3eSSadaf Ebrahimi   }
1071*ccdc9c3eSSadaf Ebrahimi }
1072*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,UTF8)1073*ccdc9c3eSSadaf Ebrahimi TEST(RE2, UTF8) {
1074*ccdc9c3eSSadaf Ebrahimi   // Check UTF-8 handling
1075*ccdc9c3eSSadaf Ebrahimi   // Three Japanese characters (nihongo)
1076*ccdc9c3eSSadaf Ebrahimi   const char utf8_string[] = {
1077*ccdc9c3eSSadaf Ebrahimi        (char)0xe6, (char)0x97, (char)0xa5, // 65e5
1078*ccdc9c3eSSadaf Ebrahimi        (char)0xe6, (char)0x9c, (char)0xac, // 627c
1079*ccdc9c3eSSadaf Ebrahimi        (char)0xe8, (char)0xaa, (char)0x9e, // 8a9e
1080*ccdc9c3eSSadaf Ebrahimi        0
1081*ccdc9c3eSSadaf Ebrahimi   };
1082*ccdc9c3eSSadaf Ebrahimi   const char utf8_pattern[] = {
1083*ccdc9c3eSSadaf Ebrahimi        '.',
1084*ccdc9c3eSSadaf Ebrahimi        (char)0xe6, (char)0x9c, (char)0xac, // 627c
1085*ccdc9c3eSSadaf Ebrahimi        '.',
1086*ccdc9c3eSSadaf Ebrahimi        0
1087*ccdc9c3eSSadaf Ebrahimi   };
1088*ccdc9c3eSSadaf Ebrahimi 
1089*ccdc9c3eSSadaf Ebrahimi   // Both should match in either mode, bytes or UTF-8
1090*ccdc9c3eSSadaf Ebrahimi   RE2 re_test1(".........", RE2::Latin1);
1091*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test1));
1092*ccdc9c3eSSadaf Ebrahimi   RE2 re_test2("...");
1093*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test2));
1094*ccdc9c3eSSadaf Ebrahimi 
1095*ccdc9c3eSSadaf Ebrahimi   // Check that '.' matches one byte or UTF-8 character
1096*ccdc9c3eSSadaf Ebrahimi   // according to the mode.
1097*ccdc9c3eSSadaf Ebrahimi   string s;
1098*ccdc9c3eSSadaf Ebrahimi   RE2 re_test3("(.)", RE2::Latin1);
1099*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test3, &s));
1100*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("\xe6"));
1101*ccdc9c3eSSadaf Ebrahimi   RE2 re_test4("(.)");
1102*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch(utf8_string, re_test4, &s));
1103*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(s, string("\xe6\x97\xa5"));
1104*ccdc9c3eSSadaf Ebrahimi 
1105*ccdc9c3eSSadaf Ebrahimi   // Check that string matches itself in either mode
1106*ccdc9c3eSSadaf Ebrahimi   RE2 re_test5(utf8_string, RE2::Latin1);
1107*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test5));
1108*ccdc9c3eSSadaf Ebrahimi   RE2 re_test6(utf8_string);
1109*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test6));
1110*ccdc9c3eSSadaf Ebrahimi 
1111*ccdc9c3eSSadaf Ebrahimi   // Check that pattern matches string only in UTF8 mode
1112*ccdc9c3eSSadaf Ebrahimi   RE2 re_test7(utf8_pattern, RE2::Latin1);
1113*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(RE2::FullMatch(utf8_string, re_test7));
1114*ccdc9c3eSSadaf Ebrahimi   RE2 re_test8(utf8_pattern);
1115*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(utf8_string, re_test8));
1116*ccdc9c3eSSadaf Ebrahimi }
1117*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,UngreedyUTF8)1118*ccdc9c3eSSadaf Ebrahimi TEST(RE2, UngreedyUTF8) {
1119*ccdc9c3eSSadaf Ebrahimi   // Check that ungreedy, UTF8 regular expressions don't match when they
1120*ccdc9c3eSSadaf Ebrahimi   // oughtn't -- see bug 82246.
1121*ccdc9c3eSSadaf Ebrahimi   {
1122*ccdc9c3eSSadaf Ebrahimi     // This code always worked.
1123*ccdc9c3eSSadaf Ebrahimi     const char* pattern = "\\w+X";
1124*ccdc9c3eSSadaf Ebrahimi     const string target = "a aX";
1125*ccdc9c3eSSadaf Ebrahimi     RE2 match_sentence(pattern, RE2::Latin1);
1126*ccdc9c3eSSadaf Ebrahimi     RE2 match_sentence_re(pattern);
1127*ccdc9c3eSSadaf Ebrahimi 
1128*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1129*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1130*ccdc9c3eSSadaf Ebrahimi   }
1131*ccdc9c3eSSadaf Ebrahimi   {
1132*ccdc9c3eSSadaf Ebrahimi     const char* pattern = "(?U)\\w+X";
1133*ccdc9c3eSSadaf Ebrahimi     const string target = "a aX";
1134*ccdc9c3eSSadaf Ebrahimi     RE2 match_sentence(pattern, RE2::Latin1);
1135*ccdc9c3eSSadaf Ebrahimi     ASSERT_EQ(match_sentence.error(), "");
1136*ccdc9c3eSSadaf Ebrahimi     RE2 match_sentence_re(pattern);
1137*ccdc9c3eSSadaf Ebrahimi 
1138*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(target, match_sentence));
1139*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::FullMatch(target, match_sentence_re));
1140*ccdc9c3eSSadaf Ebrahimi   }
1141*ccdc9c3eSSadaf Ebrahimi }
1142*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Rejects)1143*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Rejects) {
1144*ccdc9c3eSSadaf Ebrahimi   {
1145*ccdc9c3eSSadaf Ebrahimi     RE2 re("a\\1", RE2::Quiet);
1146*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok()); }
1147*ccdc9c3eSSadaf Ebrahimi   {
1148*ccdc9c3eSSadaf Ebrahimi     RE2 re("a[x", RE2::Quiet);
1149*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1150*ccdc9c3eSSadaf Ebrahimi   }
1151*ccdc9c3eSSadaf Ebrahimi   {
1152*ccdc9c3eSSadaf Ebrahimi     RE2 re("a[z-a]", RE2::Quiet);
1153*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1154*ccdc9c3eSSadaf Ebrahimi   }
1155*ccdc9c3eSSadaf Ebrahimi   {
1156*ccdc9c3eSSadaf Ebrahimi     RE2 re("a[[:foobar:]]", RE2::Quiet);
1157*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1158*ccdc9c3eSSadaf Ebrahimi   }
1159*ccdc9c3eSSadaf Ebrahimi   {
1160*ccdc9c3eSSadaf Ebrahimi     RE2 re("a(b", RE2::Quiet);
1161*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1162*ccdc9c3eSSadaf Ebrahimi   }
1163*ccdc9c3eSSadaf Ebrahimi   {
1164*ccdc9c3eSSadaf Ebrahimi     RE2 re("a\\", RE2::Quiet);
1165*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1166*ccdc9c3eSSadaf Ebrahimi   }
1167*ccdc9c3eSSadaf Ebrahimi }
1168*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,NoCrash)1169*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NoCrash) {
1170*ccdc9c3eSSadaf Ebrahimi   // Test that using a bad regexp doesn't crash.
1171*ccdc9c3eSSadaf Ebrahimi   {
1172*ccdc9c3eSSadaf Ebrahimi     RE2 re("a\\", RE2::Quiet);
1173*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1174*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::PartialMatch("a\\b", re));
1175*ccdc9c3eSSadaf Ebrahimi   }
1176*ccdc9c3eSSadaf Ebrahimi 
1177*ccdc9c3eSSadaf Ebrahimi   // Test that using an enormous regexp doesn't crash
1178*ccdc9c3eSSadaf Ebrahimi   {
1179*ccdc9c3eSSadaf Ebrahimi     RE2 re("(((.{100}){100}){100}){100}", RE2::Quiet);
1180*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(re.ok());
1181*ccdc9c3eSSadaf Ebrahimi     ASSERT_FALSE(RE2::PartialMatch("aaa", re));
1182*ccdc9c3eSSadaf Ebrahimi   }
1183*ccdc9c3eSSadaf Ebrahimi 
1184*ccdc9c3eSSadaf Ebrahimi   // Test that a crazy regexp still compiles and runs.
1185*ccdc9c3eSSadaf Ebrahimi   {
1186*ccdc9c3eSSadaf Ebrahimi     RE2 re(".{512}x", RE2::Quiet);
1187*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(re.ok());
1188*ccdc9c3eSSadaf Ebrahimi     string s;
1189*ccdc9c3eSSadaf Ebrahimi     s.append(515, 'c');
1190*ccdc9c3eSSadaf Ebrahimi     s.append("x");
1191*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(RE2::PartialMatch(s, re));
1192*ccdc9c3eSSadaf Ebrahimi   }
1193*ccdc9c3eSSadaf Ebrahimi }
1194*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Recursion)1195*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Recursion) {
1196*ccdc9c3eSSadaf Ebrahimi   // Test that recursion is stopped.
1197*ccdc9c3eSSadaf Ebrahimi   // This test is PCRE-legacy -- there's no recursion in RE2.
1198*ccdc9c3eSSadaf Ebrahimi   int bytes = 15 * 1024;  // enough to crash PCRE
1199*ccdc9c3eSSadaf Ebrahimi   TestRecursion(bytes, ".");
1200*ccdc9c3eSSadaf Ebrahimi   TestRecursion(bytes, "a");
1201*ccdc9c3eSSadaf Ebrahimi   TestRecursion(bytes, "a.");
1202*ccdc9c3eSSadaf Ebrahimi   TestRecursion(bytes, "ab.");
1203*ccdc9c3eSSadaf Ebrahimi   TestRecursion(bytes, "abc.");
1204*ccdc9c3eSSadaf Ebrahimi }
1205*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,BigCountedRepetition)1206*ccdc9c3eSSadaf Ebrahimi TEST(RE2, BigCountedRepetition) {
1207*ccdc9c3eSSadaf Ebrahimi   // Test that counted repetition works, given tons of memory.
1208*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1209*ccdc9c3eSSadaf Ebrahimi   opt.set_max_mem(256<<20);
1210*ccdc9c3eSSadaf Ebrahimi 
1211*ccdc9c3eSSadaf Ebrahimi   RE2 re(".{512}x", opt);
1212*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.ok());
1213*ccdc9c3eSSadaf Ebrahimi   string s;
1214*ccdc9c3eSSadaf Ebrahimi   s.append(515, 'c');
1215*ccdc9c3eSSadaf Ebrahimi   s.append("x");
1216*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch(s, re));
1217*ccdc9c3eSSadaf Ebrahimi }
1218*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,DeepRecursion)1219*ccdc9c3eSSadaf Ebrahimi TEST(RE2, DeepRecursion) {
1220*ccdc9c3eSSadaf Ebrahimi   // Test for deep stack recursion.  This would fail with a
1221*ccdc9c3eSSadaf Ebrahimi   // segmentation violation due to stack overflow before pcre was
1222*ccdc9c3eSSadaf Ebrahimi   // patched.
1223*ccdc9c3eSSadaf Ebrahimi   // Again, a PCRE legacy test.  RE2 doesn't recurse.
1224*ccdc9c3eSSadaf Ebrahimi   string comment("x*");
1225*ccdc9c3eSSadaf Ebrahimi   string a(131072, 'a');
1226*ccdc9c3eSSadaf Ebrahimi   comment += a;
1227*ccdc9c3eSSadaf Ebrahimi   comment += "*x";
1228*ccdc9c3eSSadaf Ebrahimi   RE2 re("((?:\\s|xx.*\n|x[*](?:\n|.)*?[*]x)*)");
1229*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::FullMatch(comment, re));
1230*ccdc9c3eSSadaf Ebrahimi }
1231*ccdc9c3eSSadaf Ebrahimi 
1232*ccdc9c3eSSadaf Ebrahimi // Suggested by Josh Hyman.  Failed when SearchOnePass was
1233*ccdc9c3eSSadaf Ebrahimi // not implementing case-folding.
TEST(CaseInsensitive,MatchAndConsume)1234*ccdc9c3eSSadaf Ebrahimi TEST(CaseInsensitive, MatchAndConsume) {
1235*ccdc9c3eSSadaf Ebrahimi   string result;
1236*ccdc9c3eSSadaf Ebrahimi   string text = "A fish named *Wanda*";
1237*ccdc9c3eSSadaf Ebrahimi   StringPiece sp(text);
1238*ccdc9c3eSSadaf Ebrahimi 
1239*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch(sp, "(?i)([wand]{5})", &result));
1240*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FindAndConsume(&sp, "(?i)([wand]{5})", &result));
1241*ccdc9c3eSSadaf Ebrahimi }
1242*ccdc9c3eSSadaf Ebrahimi 
1243*ccdc9c3eSSadaf Ebrahimi // RE2 should permit implicit conversions from string, StringPiece, const char*,
1244*ccdc9c3eSSadaf Ebrahimi // and C string literals.
TEST(RE2,ImplicitConversions)1245*ccdc9c3eSSadaf Ebrahimi TEST(RE2, ImplicitConversions) {
1246*ccdc9c3eSSadaf Ebrahimi   string re_string(".");
1247*ccdc9c3eSSadaf Ebrahimi   StringPiece re_stringpiece(".");
1248*ccdc9c3eSSadaf Ebrahimi   const char* re_cstring = ".";
1249*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("e", re_string));
1250*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("e", re_stringpiece));
1251*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("e", re_cstring));
1252*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("e", "."));
1253*ccdc9c3eSSadaf Ebrahimi }
1254*ccdc9c3eSSadaf Ebrahimi 
1255*ccdc9c3eSSadaf Ebrahimi // Bugs introduced by 8622304
TEST(RE2,CL8622304)1256*ccdc9c3eSSadaf Ebrahimi TEST(RE2, CL8622304) {
1257*ccdc9c3eSSadaf Ebrahimi   // reported by ingow
1258*ccdc9c3eSSadaf Ebrahimi   string dir;
1259*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])"));  // ok
1260*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("D", "([^\\\\])", &dir));  // fails
1261*ccdc9c3eSSadaf Ebrahimi 
1262*ccdc9c3eSSadaf Ebrahimi   // reported by jacobsa
1263*ccdc9c3eSSadaf Ebrahimi   string key, val;
1264*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("bar:1,0x2F,030,4,5;baz:true;fooby:false,true",
1265*ccdc9c3eSSadaf Ebrahimi               "(\\w+)(?::((?:[^;\\\\]|\\\\.)*))?;?",
1266*ccdc9c3eSSadaf Ebrahimi               &key,
1267*ccdc9c3eSSadaf Ebrahimi               &val));
1268*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(key, "bar");
1269*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(val, "1,0x2F,030,4,5");
1270*ccdc9c3eSSadaf Ebrahimi }
1271*ccdc9c3eSSadaf Ebrahimi 
1272*ccdc9c3eSSadaf Ebrahimi 
1273*ccdc9c3eSSadaf Ebrahimi // Check that RE2 returns correct regexp pieces on error.
1274*ccdc9c3eSSadaf Ebrahimi // In particular, make sure it returns whole runes
1275*ccdc9c3eSSadaf Ebrahimi // and that it always reports invalid UTF-8.
1276*ccdc9c3eSSadaf Ebrahimi // Also check that Perl error flag piece is big enough.
1277*ccdc9c3eSSadaf Ebrahimi static struct ErrorTest {
1278*ccdc9c3eSSadaf Ebrahimi   const char *regexp;
1279*ccdc9c3eSSadaf Ebrahimi   const char *error;
1280*ccdc9c3eSSadaf Ebrahimi } error_tests[] = {
1281*ccdc9c3eSSadaf Ebrahimi   { "ab\\αcd", "\\α" },
1282*ccdc9c3eSSadaf Ebrahimi   { "ef\\x☺01", "\\x☺0" },
1283*ccdc9c3eSSadaf Ebrahimi   { "gh\\x1☺01", "\\x1☺" },
1284*ccdc9c3eSSadaf Ebrahimi   { "ij\\x1", "\\x1" },
1285*ccdc9c3eSSadaf Ebrahimi   { "kl\\x", "\\x" },
1286*ccdc9c3eSSadaf Ebrahimi   { "uv\\x{0000☺}", "\\x{0000☺" },
1287*ccdc9c3eSSadaf Ebrahimi   { "wx\\p{ABC", "\\p{ABC" },
1288*ccdc9c3eSSadaf Ebrahimi   { "yz(?smiUX:abc)", "(?smiUX" },   // used to return (?s but the error is X
1289*ccdc9c3eSSadaf Ebrahimi   { "aa(?sm☺i", "(?sm☺" },
1290*ccdc9c3eSSadaf Ebrahimi   { "bb[abc", "[abc" },
1291*ccdc9c3eSSadaf Ebrahimi 
1292*ccdc9c3eSSadaf Ebrahimi   { "mn\\x1\377", "" },  // no argument string returned for invalid UTF-8
1293*ccdc9c3eSSadaf Ebrahimi   { "op\377qr", "" },
1294*ccdc9c3eSSadaf Ebrahimi   { "st\\x{00000\377", "" },
1295*ccdc9c3eSSadaf Ebrahimi   { "zz\\p{\377}", "" },
1296*ccdc9c3eSSadaf Ebrahimi   { "zz\\x{00\377}", "" },
1297*ccdc9c3eSSadaf Ebrahimi   { "zz(?P<name\377>abc)", "" },
1298*ccdc9c3eSSadaf Ebrahimi };
TEST(RE2,ErrorArgs)1299*ccdc9c3eSSadaf Ebrahimi TEST(RE2, ErrorArgs) {
1300*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(error_tests); i++) {
1301*ccdc9c3eSSadaf Ebrahimi     RE2 re(error_tests[i].regexp, RE2::Quiet);
1302*ccdc9c3eSSadaf Ebrahimi     EXPECT_FALSE(re.ok());
1303*ccdc9c3eSSadaf Ebrahimi     EXPECT_EQ(re.error_arg(), error_tests[i].error) << re.error();
1304*ccdc9c3eSSadaf Ebrahimi   }
1305*ccdc9c3eSSadaf Ebrahimi }
1306*ccdc9c3eSSadaf Ebrahimi 
1307*ccdc9c3eSSadaf Ebrahimi // Check that "never match \n" mode never matches \n.
1308*ccdc9c3eSSadaf Ebrahimi static struct NeverTest {
1309*ccdc9c3eSSadaf Ebrahimi   const char* regexp;
1310*ccdc9c3eSSadaf Ebrahimi   const char* text;
1311*ccdc9c3eSSadaf Ebrahimi   const char* match;
1312*ccdc9c3eSSadaf Ebrahimi } never_tests[] = {
1313*ccdc9c3eSSadaf Ebrahimi   { "(.*)", "abc\ndef\nghi\n", "abc" },
1314*ccdc9c3eSSadaf Ebrahimi   { "(?s)(abc.*def)", "abc\ndef\n", NULL },
1315*ccdc9c3eSSadaf Ebrahimi   { "(abc(.|\n)*def)", "abc\ndef\n", NULL },
1316*ccdc9c3eSSadaf Ebrahimi   { "(abc[^x]*def)", "abc\ndef\n", NULL },
1317*ccdc9c3eSSadaf Ebrahimi   { "(abc[^x]*def)", "abczzzdef\ndef\n", "abczzzdef" },
1318*ccdc9c3eSSadaf Ebrahimi };
TEST(RE2,NeverNewline)1319*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NeverNewline) {
1320*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1321*ccdc9c3eSSadaf Ebrahimi   opt.set_never_nl(true);
1322*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(never_tests); i++) {
1323*ccdc9c3eSSadaf Ebrahimi     const NeverTest& t = never_tests[i];
1324*ccdc9c3eSSadaf Ebrahimi     RE2 re(t.regexp, opt);
1325*ccdc9c3eSSadaf Ebrahimi     if (t.match == NULL) {
1326*ccdc9c3eSSadaf Ebrahimi       EXPECT_FALSE(re.PartialMatch(t.text, re));
1327*ccdc9c3eSSadaf Ebrahimi     } else {
1328*ccdc9c3eSSadaf Ebrahimi       StringPiece m;
1329*ccdc9c3eSSadaf Ebrahimi       EXPECT_TRUE(re.PartialMatch(t.text, re, &m));
1330*ccdc9c3eSSadaf Ebrahimi       EXPECT_EQ(m, t.match);
1331*ccdc9c3eSSadaf Ebrahimi     }
1332*ccdc9c3eSSadaf Ebrahimi   }
1333*ccdc9c3eSSadaf Ebrahimi }
1334*ccdc9c3eSSadaf Ebrahimi 
1335*ccdc9c3eSSadaf Ebrahimi // Check that dot_nl option works.
TEST(RE2,DotNL)1336*ccdc9c3eSSadaf Ebrahimi TEST(RE2, DotNL) {
1337*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1338*ccdc9c3eSSadaf Ebrahimi   opt.set_dot_nl(true);
1339*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch("\n", RE2(".", opt)));
1340*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch("\n", RE2("(?-s).", opt)));
1341*ccdc9c3eSSadaf Ebrahimi   opt.set_never_nl(true);
1342*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch("\n", RE2(".", opt)));
1343*ccdc9c3eSSadaf Ebrahimi }
1344*ccdc9c3eSSadaf Ebrahimi 
1345*ccdc9c3eSSadaf Ebrahimi // Check that there are no capturing groups in "never capture" mode.
TEST(RE2,NeverCapture)1346*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NeverCapture) {
1347*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1348*ccdc9c3eSSadaf Ebrahimi   opt.set_never_capture(true);
1349*ccdc9c3eSSadaf Ebrahimi   RE2 re("(r)(e)", opt);
1350*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(0, re.NumberOfCapturingGroups());
1351*ccdc9c3eSSadaf Ebrahimi }
1352*ccdc9c3eSSadaf Ebrahimi 
1353*ccdc9c3eSSadaf Ebrahimi // Bitstate bug was looking at submatch[0] even if nsubmatch == 0.
1354*ccdc9c3eSSadaf Ebrahimi // Triggered by a failed DFA search falling back to Bitstate when
1355*ccdc9c3eSSadaf Ebrahimi // using Match with a NULL submatch set.  Bitstate tried to read
1356*ccdc9c3eSSadaf Ebrahimi // the submatch[0] entry even if nsubmatch was 0.
TEST(RE2,BitstateCaptureBug)1357*ccdc9c3eSSadaf Ebrahimi TEST(RE2, BitstateCaptureBug) {
1358*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1359*ccdc9c3eSSadaf Ebrahimi   opt.set_max_mem(20000);
1360*ccdc9c3eSSadaf Ebrahimi   RE2 re("(_________$)", opt);
1361*ccdc9c3eSSadaf Ebrahimi   StringPiece s = "xxxxxxxxxxxxxxxxxxxxxxxxxx_________x";
1362*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(re.Match(s, 0, s.size(), RE2::UNANCHORED, NULL, 0));
1363*ccdc9c3eSSadaf Ebrahimi }
1364*ccdc9c3eSSadaf Ebrahimi 
1365*ccdc9c3eSSadaf Ebrahimi // C++ version of bug 609710.
TEST(RE2,UnicodeClasses)1366*ccdc9c3eSSadaf Ebrahimi TEST(RE2, UnicodeClasses) {
1367*ccdc9c3eSSadaf Ebrahimi   const string str = "ABCDEFGHI譚永鋒";
1368*ccdc9c3eSSadaf Ebrahimi   string a, b, c;
1369*ccdc9c3eSSadaf Ebrahimi 
1370*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("A", "\\p{L}"));
1371*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("A", "\\p{Lu}"));
1372*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("A", "\\p{Ll}"));
1373*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("A", "\\P{L}"));
1374*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("A", "\\P{Lu}"));
1375*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("A", "\\P{Ll}"));
1376*ccdc9c3eSSadaf Ebrahimi 
1377*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("譚", "\\p{L}"));
1378*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Lu}"));
1379*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("譚", "\\p{Ll}"));
1380*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("譚", "\\P{L}"));
1381*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Lu}"));
1382*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("譚", "\\P{Ll}"));
1383*ccdc9c3eSSadaf Ebrahimi 
1384*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("永", "\\p{L}"));
1385*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("永", "\\p{Lu}"));
1386*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("永", "\\p{Ll}"));
1387*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("永", "\\P{L}"));
1388*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("永", "\\P{Lu}"));
1389*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("永", "\\P{Ll}"));
1390*ccdc9c3eSSadaf Ebrahimi 
1391*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("鋒", "\\p{L}"));
1392*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Lu}"));
1393*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("鋒", "\\p{Ll}"));
1394*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("鋒", "\\P{L}"));
1395*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Lu}"));
1396*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch("鋒", "\\P{Ll}"));
1397*ccdc9c3eSSadaf Ebrahimi 
1398*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?(.).*?(.)", &a, &b, &c));
1399*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("A", a);
1400*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("B", b);
1401*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("C", c);
1402*ccdc9c3eSSadaf Ebrahimi 
1403*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{L}]).*?(.)", &a, &b, &c));
1404*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("A", a);
1405*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("B", b);
1406*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("C", c);
1407*ccdc9c3eSSadaf Ebrahimi 
1408*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch(str, "\\P{L}"));
1409*ccdc9c3eSSadaf Ebrahimi 
1410*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch(str, "(.).*?([\\p{Lu}]).*?(.)", &a, &b, &c));
1411*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("A", a);
1412*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("B", b);
1413*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("C", c);
1414*ccdc9c3eSSadaf Ebrahimi 
1415*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch(str, "[^\\p{Lu}\\p{Lo}]"));
1416*ccdc9c3eSSadaf Ebrahimi 
1417*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::PartialMatch(str, ".*(.).*?([\\p{Lu}\\p{Lo}]).*?(.)", &a, &b, &c));
1418*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("譚", a);
1419*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("永", b);
1420*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("鋒", c);
1421*ccdc9c3eSSadaf Ebrahimi }
1422*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,LazyRE2)1423*ccdc9c3eSSadaf Ebrahimi TEST(RE2, LazyRE2) {
1424*ccdc9c3eSSadaf Ebrahimi   // Test with and without options.
1425*ccdc9c3eSSadaf Ebrahimi   static LazyRE2 a = {"a"};
1426*ccdc9c3eSSadaf Ebrahimi   static LazyRE2 b = {"b", RE2::Latin1};
1427*ccdc9c3eSSadaf Ebrahimi 
1428*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("a", a->pattern());
1429*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2::Options::EncodingUTF8, a->options().encoding());
1430*ccdc9c3eSSadaf Ebrahimi 
1431*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ("b", b->pattern());
1432*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2::Options::EncodingLatin1, b->options().encoding());
1433*ccdc9c3eSSadaf Ebrahimi }
1434*ccdc9c3eSSadaf Ebrahimi 
1435*ccdc9c3eSSadaf Ebrahimi // Bug reported by saito. 2009/02/17
TEST(RE2,NullVsEmptyString)1436*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NullVsEmptyString) {
1437*ccdc9c3eSSadaf Ebrahimi   RE2 re(".*");
1438*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(re.ok());
1439*ccdc9c3eSSadaf Ebrahimi 
1440*ccdc9c3eSSadaf Ebrahimi   StringPiece null;
1441*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch(null, re));
1442*ccdc9c3eSSadaf Ebrahimi 
1443*ccdc9c3eSSadaf Ebrahimi   StringPiece empty("");
1444*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::FullMatch(empty, re));
1445*ccdc9c3eSSadaf Ebrahimi }
1446*ccdc9c3eSSadaf Ebrahimi 
1447*ccdc9c3eSSadaf Ebrahimi // Similar to the previous test, check that the null string and the empty
1448*ccdc9c3eSSadaf Ebrahimi // string both match, but also that the null string can only provide null
1449*ccdc9c3eSSadaf Ebrahimi // submatches whereas the empty string can also provide empty submatches.
TEST(RE2,NullVsEmptyStringSubmatches)1450*ccdc9c3eSSadaf Ebrahimi TEST(RE2, NullVsEmptyStringSubmatches) {
1451*ccdc9c3eSSadaf Ebrahimi   RE2 re("()|(foo)");
1452*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(re.ok());
1453*ccdc9c3eSSadaf Ebrahimi 
1454*ccdc9c3eSSadaf Ebrahimi   // matches[0] is overall match, [1] is (), [2] is (foo), [3] is nonexistent.
1455*ccdc9c3eSSadaf Ebrahimi   StringPiece matches[4];
1456*ccdc9c3eSSadaf Ebrahimi 
1457*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(matches); i++)
1458*ccdc9c3eSSadaf Ebrahimi     matches[i] = "bar";
1459*ccdc9c3eSSadaf Ebrahimi 
1460*ccdc9c3eSSadaf Ebrahimi   StringPiece null;
1461*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(re.Match(null, 0, null.size(), RE2::UNANCHORED,
1462*ccdc9c3eSSadaf Ebrahimi                        matches, arraysize(matches)));
1463*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(matches); i++) {
1464*ccdc9c3eSSadaf Ebrahimi     EXPECT_TRUE(matches[i] == StringPiece());
1465*ccdc9c3eSSadaf Ebrahimi     EXPECT_TRUE(matches[i].data() == NULL);  // always null
1466*ccdc9c3eSSadaf Ebrahimi     EXPECT_TRUE(matches[i] == "");
1467*ccdc9c3eSSadaf Ebrahimi   }
1468*ccdc9c3eSSadaf Ebrahimi 
1469*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(matches); i++)
1470*ccdc9c3eSSadaf Ebrahimi     matches[i] = "bar";
1471*ccdc9c3eSSadaf Ebrahimi 
1472*ccdc9c3eSSadaf Ebrahimi   StringPiece empty("");
1473*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(re.Match(empty, 0, empty.size(), RE2::UNANCHORED,
1474*ccdc9c3eSSadaf Ebrahimi                        matches, arraysize(matches)));
1475*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[0] == StringPiece());
1476*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[0].data() != NULL);  // empty, not null
1477*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[0] == "");
1478*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[1] == StringPiece());
1479*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[1].data() != NULL);  // empty, not null
1480*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[1] == "");
1481*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[2] == StringPiece());
1482*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[2].data() == NULL);
1483*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[2] == "");
1484*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[3] == StringPiece());
1485*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[3].data() == NULL);
1486*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(matches[3] == "");
1487*ccdc9c3eSSadaf Ebrahimi }
1488*ccdc9c3eSSadaf Ebrahimi 
1489*ccdc9c3eSSadaf Ebrahimi // Issue 1816809
TEST(RE2,Bug1816809)1490*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug1816809) {
1491*ccdc9c3eSSadaf Ebrahimi   RE2 re("(((((llx((-3)|(4)))(;(llx((-3)|(4))))*))))");
1492*ccdc9c3eSSadaf Ebrahimi   StringPiece piece("llx-3;llx4");
1493*ccdc9c3eSSadaf Ebrahimi   string x;
1494*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(RE2::Consume(&piece, re, &x));
1495*ccdc9c3eSSadaf Ebrahimi }
1496*ccdc9c3eSSadaf Ebrahimi 
1497*ccdc9c3eSSadaf Ebrahimi // Issue 3061120
TEST(RE2,Bug3061120)1498*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug3061120) {
1499*ccdc9c3eSSadaf Ebrahimi   RE2 re("(?i)\\W");
1500*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch("x", re));  // always worked
1501*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch("k", re));  // broke because of kelvin
1502*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::PartialMatch("s", re));  // broke because of latin long s
1503*ccdc9c3eSSadaf Ebrahimi }
1504*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,CapturingGroupNames)1505*ccdc9c3eSSadaf Ebrahimi TEST(RE2, CapturingGroupNames) {
1506*ccdc9c3eSSadaf Ebrahimi   // Opening parentheses annotated with group IDs:
1507*ccdc9c3eSSadaf Ebrahimi   //      12    3        45   6         7
1508*ccdc9c3eSSadaf Ebrahimi   RE2 re("((abc)(?P<G2>)|((e+)(?P<G2>.*)(?P<G1>u+)))");
1509*ccdc9c3eSSadaf Ebrahimi   EXPECT_TRUE(re.ok());
1510*ccdc9c3eSSadaf Ebrahimi   const std::map<int, string>& have = re.CapturingGroupNames();
1511*ccdc9c3eSSadaf Ebrahimi   std::map<int, string> want;
1512*ccdc9c3eSSadaf Ebrahimi   want[3] = "G2";
1513*ccdc9c3eSSadaf Ebrahimi   want[6] = "G2";
1514*ccdc9c3eSSadaf Ebrahimi   want[7] = "G1";
1515*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(want, have);
1516*ccdc9c3eSSadaf Ebrahimi }
1517*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,RegexpToStringLossOfAnchor)1518*ccdc9c3eSSadaf Ebrahimi TEST(RE2, RegexpToStringLossOfAnchor) {
1519*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2("^[a-c]at", RE2::POSIX).Regexp()->ToString(), "^[a-c]at");
1520*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2("^[a-c]at").Regexp()->ToString(), "(?-m:^)[a-c]at");
1521*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2("ca[t-z]$", RE2::POSIX).Regexp()->ToString(), "ca[t-z]$");
1522*ccdc9c3eSSadaf Ebrahimi   EXPECT_EQ(RE2("ca[t-z]$").Regexp()->ToString(), "ca[t-z](?-m:$)");
1523*ccdc9c3eSSadaf Ebrahimi }
1524*ccdc9c3eSSadaf Ebrahimi 
1525*ccdc9c3eSSadaf Ebrahimi // Issue 10131674
TEST(RE2,Bug10131674)1526*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug10131674) {
1527*ccdc9c3eSSadaf Ebrahimi   // Some of these escapes describe values that do not fit in a byte.
1528*ccdc9c3eSSadaf Ebrahimi   RE2 re("\\140\\440\\174\\271\\150\\656\\106\\201\\004\\332", RE2::Latin1);
1529*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(re.ok());
1530*ccdc9c3eSSadaf Ebrahimi   EXPECT_FALSE(RE2::FullMatch("hello world", re));
1531*ccdc9c3eSSadaf Ebrahimi }
1532*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Bug18391750)1533*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug18391750) {
1534*ccdc9c3eSSadaf Ebrahimi   // Stray write past end of match_ in nfa.cc, caught by fuzzing + address sanitizer.
1535*ccdc9c3eSSadaf Ebrahimi   const char t[] = {
1536*ccdc9c3eSSadaf Ebrahimi       (char)0x28, (char)0x28, (char)0xfc, (char)0xfc, (char)0x08, (char)0x08,
1537*ccdc9c3eSSadaf Ebrahimi       (char)0x26, (char)0x26, (char)0x28, (char)0xc2, (char)0x9b, (char)0xc5,
1538*ccdc9c3eSSadaf Ebrahimi       (char)0xc5, (char)0xd4, (char)0x8f, (char)0x8f, (char)0x69, (char)0x69,
1539*ccdc9c3eSSadaf Ebrahimi       (char)0xe7, (char)0x29, (char)0x7b, (char)0x37, (char)0x31, (char)0x31,
1540*ccdc9c3eSSadaf Ebrahimi       (char)0x7d, (char)0xae, (char)0x7c, (char)0x7c, (char)0xf3, (char)0x29,
1541*ccdc9c3eSSadaf Ebrahimi       (char)0xae, (char)0xae, (char)0x2e, (char)0x2a, (char)0x29, (char)0x00,
1542*ccdc9c3eSSadaf Ebrahimi   };
1543*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1544*ccdc9c3eSSadaf Ebrahimi   opt.set_encoding(RE2::Options::EncodingLatin1);
1545*ccdc9c3eSSadaf Ebrahimi   opt.set_longest_match(true);
1546*ccdc9c3eSSadaf Ebrahimi   opt.set_dot_nl(true);
1547*ccdc9c3eSSadaf Ebrahimi   opt.set_case_sensitive(false);
1548*ccdc9c3eSSadaf Ebrahimi   RE2 re(t, opt);
1549*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.ok());
1550*ccdc9c3eSSadaf Ebrahimi   RE2::PartialMatch(t, re);
1551*ccdc9c3eSSadaf Ebrahimi }
1552*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Bug18458852)1553*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug18458852) {
1554*ccdc9c3eSSadaf Ebrahimi   // Bug in parser accepting invalid (too large) rune,
1555*ccdc9c3eSSadaf Ebrahimi   // causing compiler to fail in DCHECK in UTF-8
1556*ccdc9c3eSSadaf Ebrahimi   // character class code.
1557*ccdc9c3eSSadaf Ebrahimi   const char b[] = {
1558*ccdc9c3eSSadaf Ebrahimi       (char)0x28, (char)0x05, (char)0x05, (char)0x41, (char)0x41, (char)0x28,
1559*ccdc9c3eSSadaf Ebrahimi       (char)0x24, (char)0x5b, (char)0x5e, (char)0xf5, (char)0x87, (char)0x87,
1560*ccdc9c3eSSadaf Ebrahimi       (char)0x90, (char)0x29, (char)0x5d, (char)0x29, (char)0x29, (char)0x00,
1561*ccdc9c3eSSadaf Ebrahimi   };
1562*ccdc9c3eSSadaf Ebrahimi   RE2 re(b);
1563*ccdc9c3eSSadaf Ebrahimi   ASSERT_FALSE(re.ok());
1564*ccdc9c3eSSadaf Ebrahimi }
1565*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Bug18523943)1566*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug18523943) {
1567*ccdc9c3eSSadaf Ebrahimi   // Bug in BitState: case kFailInst failed the match entirely.
1568*ccdc9c3eSSadaf Ebrahimi 
1569*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1570*ccdc9c3eSSadaf Ebrahimi   const char a[] = {
1571*ccdc9c3eSSadaf Ebrahimi       (char)0x29, (char)0x29, (char)0x24, (char)0x00,
1572*ccdc9c3eSSadaf Ebrahimi   };
1573*ccdc9c3eSSadaf Ebrahimi   const char b[] = {
1574*ccdc9c3eSSadaf Ebrahimi       (char)0x28, (char)0x0a, (char)0x2a, (char)0x2a, (char)0x29, (char)0x00,
1575*ccdc9c3eSSadaf Ebrahimi   };
1576*ccdc9c3eSSadaf Ebrahimi   opt.set_log_errors(false);
1577*ccdc9c3eSSadaf Ebrahimi   opt.set_encoding(RE2::Options::EncodingLatin1);
1578*ccdc9c3eSSadaf Ebrahimi   opt.set_posix_syntax(true);
1579*ccdc9c3eSSadaf Ebrahimi   opt.set_longest_match(true);
1580*ccdc9c3eSSadaf Ebrahimi   opt.set_literal(false);
1581*ccdc9c3eSSadaf Ebrahimi   opt.set_never_nl(true);
1582*ccdc9c3eSSadaf Ebrahimi 
1583*ccdc9c3eSSadaf Ebrahimi   RE2 re((const char*)b, opt);
1584*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.ok());
1585*ccdc9c3eSSadaf Ebrahimi   string s1;
1586*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(RE2::PartialMatch((const char*)a, re, &s1));
1587*ccdc9c3eSSadaf Ebrahimi }
1588*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Bug21371806)1589*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug21371806) {
1590*ccdc9c3eSSadaf Ebrahimi   // Bug in parser accepting Unicode groups in Latin-1 mode,
1591*ccdc9c3eSSadaf Ebrahimi   // causing compiler to fail in DCHECK in prog.cc.
1592*ccdc9c3eSSadaf Ebrahimi 
1593*ccdc9c3eSSadaf Ebrahimi   RE2::Options opt;
1594*ccdc9c3eSSadaf Ebrahimi   opt.set_encoding(RE2::Options::EncodingLatin1);
1595*ccdc9c3eSSadaf Ebrahimi 
1596*ccdc9c3eSSadaf Ebrahimi   RE2 re("g\\p{Zl}]", opt);
1597*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.ok());
1598*ccdc9c3eSSadaf Ebrahimi }
1599*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Bug26356109)1600*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Bug26356109) {
1601*ccdc9c3eSSadaf Ebrahimi   // Bug in parser caused by factoring of common prefixes in alternations.
1602*ccdc9c3eSSadaf Ebrahimi 
1603*ccdc9c3eSSadaf Ebrahimi   // In the past, this was factored to "a\\C*?[bc]". Thus, the automaton would
1604*ccdc9c3eSSadaf Ebrahimi   // consume "ab" and then stop (when unanchored) whereas it should consume all
1605*ccdc9c3eSSadaf Ebrahimi   // of "abc" as per first-match semantics.
1606*ccdc9c3eSSadaf Ebrahimi   RE2 re("a\\C*?c|a\\C*?b");
1607*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.ok());
1608*ccdc9c3eSSadaf Ebrahimi 
1609*ccdc9c3eSSadaf Ebrahimi   string s = "abc";
1610*ccdc9c3eSSadaf Ebrahimi   StringPiece m;
1611*ccdc9c3eSSadaf Ebrahimi 
1612*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::UNANCHORED, &m, 1));
1613*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(m, s) << " (UNANCHORED) got m='" << m << "', want '" << s << "'";
1614*ccdc9c3eSSadaf Ebrahimi 
1615*ccdc9c3eSSadaf Ebrahimi   ASSERT_TRUE(re.Match(s, 0, s.size(), RE2::ANCHOR_BOTH, &m, 1));
1616*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(m, s) << " (ANCHOR_BOTH) got m='" << m << "', want '" << s << "'";
1617*ccdc9c3eSSadaf Ebrahimi }
1618*ccdc9c3eSSadaf Ebrahimi 
TEST(RE2,Issue104)1619*ccdc9c3eSSadaf Ebrahimi TEST(RE2, Issue104) {
1620*ccdc9c3eSSadaf Ebrahimi   // RE2::GlobalReplace always advanced by one byte when the empty string was
1621*ccdc9c3eSSadaf Ebrahimi   // matched, which would clobber any rune that is longer than one byte.
1622*ccdc9c3eSSadaf Ebrahimi 
1623*ccdc9c3eSSadaf Ebrahimi   string s = "bc";
1624*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(3, RE2::GlobalReplace(&s, "a*", "d"));
1625*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ("dbdcd", s);
1626*ccdc9c3eSSadaf Ebrahimi 
1627*ccdc9c3eSSadaf Ebrahimi   s = "ąć";
1628*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(3, RE2::GlobalReplace(&s, "Ć*", "Ĉ"));
1629*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ("ĈąĈćĈ", s);
1630*ccdc9c3eSSadaf Ebrahimi 
1631*ccdc9c3eSSadaf Ebrahimi   s = "人类";
1632*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ(3, RE2::GlobalReplace(&s, "大*", "小"));
1633*ccdc9c3eSSadaf Ebrahimi   ASSERT_EQ("小人小类小", s);
1634*ccdc9c3eSSadaf Ebrahimi }
1635*ccdc9c3eSSadaf Ebrahimi 
1636*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
1637