xref: /aosp_15_r20/external/regex-re2/re2/testing/simplify_test.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2006 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi // Test simplify.cc.
6*ccdc9c3eSSadaf Ebrahimi 
7*ccdc9c3eSSadaf Ebrahimi #include <string.h>
8*ccdc9c3eSSadaf Ebrahimi #include <string>
9*ccdc9c3eSSadaf Ebrahimi 
10*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
11*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
12*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
13*ccdc9c3eSSadaf Ebrahimi 
14*ccdc9c3eSSadaf Ebrahimi namespace re2 {
15*ccdc9c3eSSadaf Ebrahimi 
16*ccdc9c3eSSadaf Ebrahimi struct Test {
17*ccdc9c3eSSadaf Ebrahimi   const char* regexp;
18*ccdc9c3eSSadaf Ebrahimi   const char* simplified;
19*ccdc9c3eSSadaf Ebrahimi };
20*ccdc9c3eSSadaf Ebrahimi 
21*ccdc9c3eSSadaf Ebrahimi static Test tests[] = {
22*ccdc9c3eSSadaf Ebrahimi   // Already-simple constructs
23*ccdc9c3eSSadaf Ebrahimi   { "a", "a" },
24*ccdc9c3eSSadaf Ebrahimi   { "ab", "ab" },
25*ccdc9c3eSSadaf Ebrahimi   { "a|b", "[a-b]" },
26*ccdc9c3eSSadaf Ebrahimi   { "ab|cd", "ab|cd" },
27*ccdc9c3eSSadaf Ebrahimi   { "(ab)*", "(ab)*" },
28*ccdc9c3eSSadaf Ebrahimi   { "(ab)+", "(ab)+" },
29*ccdc9c3eSSadaf Ebrahimi   { "(ab)?", "(ab)?" },
30*ccdc9c3eSSadaf Ebrahimi   { ".", "." },
31*ccdc9c3eSSadaf Ebrahimi   { "^", "^" },
32*ccdc9c3eSSadaf Ebrahimi   { "$", "$" },
33*ccdc9c3eSSadaf Ebrahimi   { "[ac]", "[ac]" },
34*ccdc9c3eSSadaf Ebrahimi   { "[^ac]", "[^ac]" },
35*ccdc9c3eSSadaf Ebrahimi 
36*ccdc9c3eSSadaf Ebrahimi   // Posix character classes
37*ccdc9c3eSSadaf Ebrahimi   { "[[:alnum:]]", "[0-9A-Za-z]" },
38*ccdc9c3eSSadaf Ebrahimi   { "[[:alpha:]]", "[A-Za-z]" },
39*ccdc9c3eSSadaf Ebrahimi   { "[[:blank:]]", "[\\t ]" },
40*ccdc9c3eSSadaf Ebrahimi   { "[[:cntrl:]]", "[\\x00-\\x1f\\x7f]" },
41*ccdc9c3eSSadaf Ebrahimi   { "[[:digit:]]", "[0-9]" },
42*ccdc9c3eSSadaf Ebrahimi   { "[[:graph:]]", "[!-~]" },
43*ccdc9c3eSSadaf Ebrahimi   { "[[:lower:]]", "[a-z]" },
44*ccdc9c3eSSadaf Ebrahimi   { "[[:print:]]", "[ -~]" },
45*ccdc9c3eSSadaf Ebrahimi   { "[[:punct:]]", "[!-/:-@\\[-`{-~]" },
46*ccdc9c3eSSadaf Ebrahimi   { "[[:space:]]" , "[\\t-\\r ]" },
47*ccdc9c3eSSadaf Ebrahimi   { "[[:upper:]]", "[A-Z]" },
48*ccdc9c3eSSadaf Ebrahimi   { "[[:xdigit:]]", "[0-9A-Fa-f]" },
49*ccdc9c3eSSadaf Ebrahimi 
50*ccdc9c3eSSadaf Ebrahimi   // Perl character classes
51*ccdc9c3eSSadaf Ebrahimi   { "\\d", "[0-9]" },
52*ccdc9c3eSSadaf Ebrahimi   { "\\s", "[\\t-\\n\\f-\\r ]" },
53*ccdc9c3eSSadaf Ebrahimi   { "\\w", "[0-9A-Z_a-z]" },
54*ccdc9c3eSSadaf Ebrahimi   { "\\D", "[^0-9]" },
55*ccdc9c3eSSadaf Ebrahimi   { "\\S", "[^\\t-\\n\\f-\\r ]" },
56*ccdc9c3eSSadaf Ebrahimi   { "\\W", "[^0-9A-Z_a-z]" },
57*ccdc9c3eSSadaf Ebrahimi   { "[\\d]", "[0-9]" },
58*ccdc9c3eSSadaf Ebrahimi   { "[\\s]", "[\\t-\\n\\f-\\r ]" },
59*ccdc9c3eSSadaf Ebrahimi   { "[\\w]", "[0-9A-Z_a-z]" },
60*ccdc9c3eSSadaf Ebrahimi   { "[\\D]", "[^0-9]" },
61*ccdc9c3eSSadaf Ebrahimi   { "[\\S]", "[^\\t-\\n\\f-\\r ]" },
62*ccdc9c3eSSadaf Ebrahimi   { "[\\W]", "[^0-9A-Z_a-z]" },
63*ccdc9c3eSSadaf Ebrahimi 
64*ccdc9c3eSSadaf Ebrahimi   // Posix repetitions
65*ccdc9c3eSSadaf Ebrahimi   { "a{1}", "a" },
66*ccdc9c3eSSadaf Ebrahimi   { "a{2}", "aa" },
67*ccdc9c3eSSadaf Ebrahimi   { "a{5}", "aaaaa" },
68*ccdc9c3eSSadaf Ebrahimi   { "a{0,1}", "a?" },
69*ccdc9c3eSSadaf Ebrahimi   // The next three are illegible because Simplify inserts (?:)
70*ccdc9c3eSSadaf Ebrahimi   // parens instead of () parens to avoid creating extra
71*ccdc9c3eSSadaf Ebrahimi   // captured subexpressions.  The comments show a version fewer parens.
72*ccdc9c3eSSadaf Ebrahimi   { "(a){0,2}",                   "(?:(a)(a)?)?"     },  //       (aa?)?
73*ccdc9c3eSSadaf Ebrahimi   { "(a){0,4}",       "(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },  //   (a(a(aa?)?)?)?
74*ccdc9c3eSSadaf Ebrahimi   { "(a){2,6}", "(a)(a)(?:(a)(?:(a)(?:(a)(a)?)?)?)?" },  // aa(a(a(aa?)?)?)?
75*ccdc9c3eSSadaf Ebrahimi   { "a{0,2}",           "(?:aa?)?"     },  //       (aa?)?
76*ccdc9c3eSSadaf Ebrahimi   { "a{0,4}",   "(?:a(?:a(?:aa?)?)?)?" },  //   (a(a(aa?)?)?)?
77*ccdc9c3eSSadaf Ebrahimi   { "a{2,6}", "aa(?:a(?:a(?:aa?)?)?)?" },  // aa(a(a(aa?)?)?)?
78*ccdc9c3eSSadaf Ebrahimi   { "a{0,}", "a*" },
79*ccdc9c3eSSadaf Ebrahimi   { "a{1,}", "a+" },
80*ccdc9c3eSSadaf Ebrahimi   { "a{2,}", "aa+" },
81*ccdc9c3eSSadaf Ebrahimi   { "a{5,}", "aaaaa+" },
82*ccdc9c3eSSadaf Ebrahimi 
83*ccdc9c3eSSadaf Ebrahimi   // Test that operators simplify their arguments.
84*ccdc9c3eSSadaf Ebrahimi   // (Simplify used to not simplify arguments to a {} repeat.)
85*ccdc9c3eSSadaf Ebrahimi   { "(?:a{1,}){1,}", "a+" },
86*ccdc9c3eSSadaf Ebrahimi   { "(a{1,}b{1,})", "(a+b+)" },
87*ccdc9c3eSSadaf Ebrahimi   { "a{1,}|b{1,}", "a+|b+" },
88*ccdc9c3eSSadaf Ebrahimi   { "(?:a{1,})*", "(?:a+)*" },
89*ccdc9c3eSSadaf Ebrahimi   { "(?:a{1,})+", "a+" },
90*ccdc9c3eSSadaf Ebrahimi   { "(?:a{1,})?", "(?:a+)?" },
91*ccdc9c3eSSadaf Ebrahimi   { "a{0}", "" },
92*ccdc9c3eSSadaf Ebrahimi 
93*ccdc9c3eSSadaf Ebrahimi   // Character class simplification
94*ccdc9c3eSSadaf Ebrahimi   { "[ab]", "[a-b]" },
95*ccdc9c3eSSadaf Ebrahimi   { "[a-za-za-z]", "[a-z]" },
96*ccdc9c3eSSadaf Ebrahimi   { "[A-Za-zA-Za-z]", "[A-Za-z]" },
97*ccdc9c3eSSadaf Ebrahimi   { "[ABCDEFGH]", "[A-H]" },
98*ccdc9c3eSSadaf Ebrahimi   { "[AB-CD-EF-GH]", "[A-H]" },
99*ccdc9c3eSSadaf Ebrahimi   { "[W-ZP-XE-R]", "[E-Z]" },
100*ccdc9c3eSSadaf Ebrahimi   { "[a-ee-gg-m]", "[a-m]" },
101*ccdc9c3eSSadaf Ebrahimi   { "[a-ea-ha-m]", "[a-m]" },
102*ccdc9c3eSSadaf Ebrahimi   { "[a-ma-ha-e]", "[a-m]" },
103*ccdc9c3eSSadaf Ebrahimi   { "[a-zA-Z0-9 -~]", "[ -~]" },
104*ccdc9c3eSSadaf Ebrahimi 
105*ccdc9c3eSSadaf Ebrahimi   // Empty character classes
106*ccdc9c3eSSadaf Ebrahimi   { "[^[:cntrl:][:^cntrl:]]", "[^\\x00-\\x{10ffff}]" },
107*ccdc9c3eSSadaf Ebrahimi 
108*ccdc9c3eSSadaf Ebrahimi   // Full character classes
109*ccdc9c3eSSadaf Ebrahimi   { "[[:cntrl:][:^cntrl:]]", "." },
110*ccdc9c3eSSadaf Ebrahimi 
111*ccdc9c3eSSadaf Ebrahimi   // Unicode case folding.
112*ccdc9c3eSSadaf Ebrahimi   { "(?i)A", "[Aa]" },
113*ccdc9c3eSSadaf Ebrahimi   { "(?i)a", "[Aa]" },
114*ccdc9c3eSSadaf Ebrahimi   { "(?i)K", "[Kk\\x{212a}]" },
115*ccdc9c3eSSadaf Ebrahimi   { "(?i)k", "[Kk\\x{212a}]" },
116*ccdc9c3eSSadaf Ebrahimi   { "(?i)\\x{212a}", "[Kk\\x{212a}]" },
117*ccdc9c3eSSadaf Ebrahimi   { "(?i)[a-z]", "[A-Za-z\\x{17f}\\x{212a}]" },
118*ccdc9c3eSSadaf Ebrahimi   { "(?i)[\\x00-\\x{FFFD}]", "[\\x00-\\x{fffd}]" },
119*ccdc9c3eSSadaf Ebrahimi   { "(?i)[\\x00-\\x{10ffff}]", "." },
120*ccdc9c3eSSadaf Ebrahimi 
121*ccdc9c3eSSadaf Ebrahimi   // Empty string as a regular expression.
122*ccdc9c3eSSadaf Ebrahimi   // Empty string must be preserved inside parens in order
123*ccdc9c3eSSadaf Ebrahimi   // to make submatches work right, so these are less
124*ccdc9c3eSSadaf Ebrahimi   // interesting than they used to be.  ToString inserts
125*ccdc9c3eSSadaf Ebrahimi   // explicit (?:) in place of non-parenthesized empty strings,
126*ccdc9c3eSSadaf Ebrahimi   // to make them easier to spot for other parsers.
127*ccdc9c3eSSadaf Ebrahimi   { "(a|b|)", "([a-b]|(?:))" },
128*ccdc9c3eSSadaf Ebrahimi   { "(|)", "((?:)|(?:))" },
129*ccdc9c3eSSadaf Ebrahimi   { "a()", "a()" },
130*ccdc9c3eSSadaf Ebrahimi   { "(()|())", "(()|())" },
131*ccdc9c3eSSadaf Ebrahimi   { "(a|)", "(a|(?:))" },
132*ccdc9c3eSSadaf Ebrahimi   { "ab()cd()", "ab()cd()" },
133*ccdc9c3eSSadaf Ebrahimi   { "()", "()" },
134*ccdc9c3eSSadaf Ebrahimi   { "()*", "()*" },
135*ccdc9c3eSSadaf Ebrahimi   { "()+", "()+" },
136*ccdc9c3eSSadaf Ebrahimi   { "()?" , "()?" },
137*ccdc9c3eSSadaf Ebrahimi   { "(){0}", "" },
138*ccdc9c3eSSadaf Ebrahimi   { "(){1}", "()" },
139*ccdc9c3eSSadaf Ebrahimi   { "(){1,}", "()+" },
140*ccdc9c3eSSadaf Ebrahimi   { "(){0,2}", "(?:()()?)?" },
141*ccdc9c3eSSadaf Ebrahimi 
142*ccdc9c3eSSadaf Ebrahimi   // Test that coalescing occurs and that the resulting repeats are simplified.
143*ccdc9c3eSSadaf Ebrahimi   // Two-op combinations of *, +, ?, {n}, {n,} and {n,m} with a literal:
144*ccdc9c3eSSadaf Ebrahimi   { "a*a*", "a*" },
145*ccdc9c3eSSadaf Ebrahimi   { "a*a+", "a+" },
146*ccdc9c3eSSadaf Ebrahimi   { "a*a?", "a*" },
147*ccdc9c3eSSadaf Ebrahimi   { "a*a{2}", "aa+" },
148*ccdc9c3eSSadaf Ebrahimi   { "a*a{2,}", "aa+" },
149*ccdc9c3eSSadaf Ebrahimi   { "a*a{2,3}", "aa+" },
150*ccdc9c3eSSadaf Ebrahimi   { "a+a*", "a+" },
151*ccdc9c3eSSadaf Ebrahimi   { "a+a+", "aa+" },
152*ccdc9c3eSSadaf Ebrahimi   { "a+a?", "a+" },
153*ccdc9c3eSSadaf Ebrahimi   { "a+a{2}", "aaa+" },
154*ccdc9c3eSSadaf Ebrahimi   { "a+a{2,}", "aaa+" },
155*ccdc9c3eSSadaf Ebrahimi   { "a+a{2,3}", "aaa+" },
156*ccdc9c3eSSadaf Ebrahimi   { "a?a*", "a*" },
157*ccdc9c3eSSadaf Ebrahimi   { "a?a+", "a+" },
158*ccdc9c3eSSadaf Ebrahimi   { "a?a?", "(?:aa?)?" },
159*ccdc9c3eSSadaf Ebrahimi   { "a?a{2}", "aaa?" },
160*ccdc9c3eSSadaf Ebrahimi   { "a?a{2,}", "aa+" },
161*ccdc9c3eSSadaf Ebrahimi   { "a?a{2,3}", "aa(?:aa?)?" },
162*ccdc9c3eSSadaf Ebrahimi   { "a{2}a*", "aa+" },
163*ccdc9c3eSSadaf Ebrahimi   { "a{2}a+", "aaa+" },
164*ccdc9c3eSSadaf Ebrahimi   { "a{2}a?", "aaa?" },
165*ccdc9c3eSSadaf Ebrahimi   { "a{2}a{2}", "aaaa" },
166*ccdc9c3eSSadaf Ebrahimi   { "a{2}a{2,}", "aaaa+" },
167*ccdc9c3eSSadaf Ebrahimi   { "a{2}a{2,3}", "aaaaa?" },
168*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a*", "aa+" },
169*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a+", "aaa+" },
170*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a?", "aa+" },
171*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a{2}", "aaaa+" },
172*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a{2,}", "aaaa+" },
173*ccdc9c3eSSadaf Ebrahimi   { "a{2,}a{2,3}", "aaaa+" },
174*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a*", "aa+" },
175*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a+", "aaa+" },
176*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a?", "aa(?:aa?)?" },
177*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a{2}", "aaaaa?" },
178*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a{2,}", "aaaa+" },
179*ccdc9c3eSSadaf Ebrahimi   { "a{2,3}a{2,3}", "aaaa(?:aa?)?" },
180*ccdc9c3eSSadaf Ebrahimi   // With a char class, any char and any byte:
181*ccdc9c3eSSadaf Ebrahimi   { "\\d*\\d*", "[0-9]*" },
182*ccdc9c3eSSadaf Ebrahimi   { ".*.*", ".*" },
183*ccdc9c3eSSadaf Ebrahimi   { "\\C*\\C*", "\\C*" },
184*ccdc9c3eSSadaf Ebrahimi   // FoldCase works, but must be consistent:
185*ccdc9c3eSSadaf Ebrahimi   { "(?i)A*a*", "[Aa]*" },
186*ccdc9c3eSSadaf Ebrahimi   { "(?i)a+A+", "[Aa][Aa]+" },
187*ccdc9c3eSSadaf Ebrahimi   { "(?i)A*(?-i)a*", "[Aa]*a*" },
188*ccdc9c3eSSadaf Ebrahimi   { "(?i)a+(?-i)A+", "[Aa]+A+" },
189*ccdc9c3eSSadaf Ebrahimi   // NonGreedy works, but must be consistent:
190*ccdc9c3eSSadaf Ebrahimi   { "a*?a*?", "a*?" },
191*ccdc9c3eSSadaf Ebrahimi   { "a+?a+?", "aa+?" },
192*ccdc9c3eSSadaf Ebrahimi   { "a*?a*", "a*?a*" },
193*ccdc9c3eSSadaf Ebrahimi   { "a+a+?", "a+a+?" },
194*ccdc9c3eSSadaf Ebrahimi   // The second element is the literal, char class, any char or any byte:
195*ccdc9c3eSSadaf Ebrahimi   { "a*a", "a+" },
196*ccdc9c3eSSadaf Ebrahimi   { "\\d*\\d", "[0-9]+" },
197*ccdc9c3eSSadaf Ebrahimi   { ".*.", ".+" },
198*ccdc9c3eSSadaf Ebrahimi   { "\\C*\\C", "\\C+" },
199*ccdc9c3eSSadaf Ebrahimi   // FoldCase works, but must be consistent:
200*ccdc9c3eSSadaf Ebrahimi   { "(?i)A*a", "[Aa]+" },
201*ccdc9c3eSSadaf Ebrahimi   { "(?i)a+A", "[Aa][Aa]+" },
202*ccdc9c3eSSadaf Ebrahimi   { "(?i)A*(?-i)a", "[Aa]*a" },
203*ccdc9c3eSSadaf Ebrahimi   { "(?i)a+(?-i)A", "[Aa]+A" },
204*ccdc9c3eSSadaf Ebrahimi   // The second element is a literal string that begins with the literal:
205*ccdc9c3eSSadaf Ebrahimi   { "a*aa", "aa+" },
206*ccdc9c3eSSadaf Ebrahimi   { "a*aab", "aa+b" },
207*ccdc9c3eSSadaf Ebrahimi   // FoldCase works, but must be consistent:
208*ccdc9c3eSSadaf Ebrahimi   { "(?i)a*aa", "[Aa][Aa]+" },
209*ccdc9c3eSSadaf Ebrahimi   { "(?i)a*aab", "[Aa][Aa]+[Bb]" },
210*ccdc9c3eSSadaf Ebrahimi   { "(?i)a*(?-i)aa", "[Aa]*aa" },
211*ccdc9c3eSSadaf Ebrahimi   { "(?i)a*(?-i)aab", "[Aa]*aab" },
212*ccdc9c3eSSadaf Ebrahimi   // Negative tests with mismatching ops:
213*ccdc9c3eSSadaf Ebrahimi   { "a*b*", "a*b*" },
214*ccdc9c3eSSadaf Ebrahimi   { "\\d*\\D*", "[0-9]*[^0-9]*" },
215*ccdc9c3eSSadaf Ebrahimi   { "a+b", "a+b" },
216*ccdc9c3eSSadaf Ebrahimi   { "\\d+\\D", "[0-9]+[^0-9]" },
217*ccdc9c3eSSadaf Ebrahimi   { "a?bb", "a?bb" },
218*ccdc9c3eSSadaf Ebrahimi   // Negative tests with capturing groups:
219*ccdc9c3eSSadaf Ebrahimi   { "(a*)a*", "(a*)a*" },
220*ccdc9c3eSSadaf Ebrahimi   { "a+(a)", "a+(a)" },
221*ccdc9c3eSSadaf Ebrahimi   { "(a?)(aa)", "(a?)(aa)" },
222*ccdc9c3eSSadaf Ebrahimi   // Just for fun:
223*ccdc9c3eSSadaf Ebrahimi   { "aa*aa+aa?aa{2}aaa{2,}aaa{2,3}a", "aaaaaaaaaaaaaaaa+" },
224*ccdc9c3eSSadaf Ebrahimi 
225*ccdc9c3eSSadaf Ebrahimi   // During coalescing, the child of the repeat changes, so we build a new
226*ccdc9c3eSSadaf Ebrahimi   // repeat. The new repeat must have the min and max of the old repeat.
227*ccdc9c3eSSadaf Ebrahimi   // Failure to copy them results in min=0 and max=0 -> empty match.
228*ccdc9c3eSSadaf Ebrahimi   { "(?:a*aab){2}", "aa+baa+b" },
229*ccdc9c3eSSadaf Ebrahimi 
230*ccdc9c3eSSadaf Ebrahimi   // During coalescing, the child of the capture changes, so we build a new
231*ccdc9c3eSSadaf Ebrahimi   // capture. The new capture must have the cap of the old capture.
232*ccdc9c3eSSadaf Ebrahimi   // Failure to copy it results in cap=0 -> ToString() logs a fatal error.
233*ccdc9c3eSSadaf Ebrahimi   { "(a*aab)", "(aa+b)" },
234*ccdc9c3eSSadaf Ebrahimi 
235*ccdc9c3eSSadaf Ebrahimi   // Test squashing of **, ++, ?? et cetera.
236*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,}){0,}", "a*" },
237*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){1,}){1,}", "a+" },
238*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,1}){0,1}", "a?" },
239*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,}){1,}", "a*" },
240*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,}){0,1}", "a*" },
241*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){1,}){0,}", "a*" },
242*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){1,}){0,1}", "a*" },
243*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,1}){0,}", "a*" },
244*ccdc9c3eSSadaf Ebrahimi   { "(?:(?:a){0,1}){1,}", "a*" },
245*ccdc9c3eSSadaf Ebrahimi };
246*ccdc9c3eSSadaf Ebrahimi 
TEST(TestSimplify,SimpleRegexps)247*ccdc9c3eSSadaf Ebrahimi TEST(TestSimplify, SimpleRegexps) {
248*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(tests); i++) {
249*ccdc9c3eSSadaf Ebrahimi     RegexpStatus status;
250*ccdc9c3eSSadaf Ebrahimi     VLOG(1) << "Testing " << tests[i].regexp;
251*ccdc9c3eSSadaf Ebrahimi     Regexp* re = Regexp::Parse(tests[i].regexp,
252*ccdc9c3eSSadaf Ebrahimi                                Regexp::MatchNL | (Regexp::LikePerl &
253*ccdc9c3eSSadaf Ebrahimi                                                   ~Regexp::OneLine),
254*ccdc9c3eSSadaf Ebrahimi                                &status);
255*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
256*ccdc9c3eSSadaf Ebrahimi     Regexp* sre = re->Simplify();
257*ccdc9c3eSSadaf Ebrahimi     ASSERT_TRUE(sre != NULL);
258*ccdc9c3eSSadaf Ebrahimi 
259*ccdc9c3eSSadaf Ebrahimi     // Check that already-simple regexps don't allocate new ones.
260*ccdc9c3eSSadaf Ebrahimi     if (strcmp(tests[i].regexp, tests[i].simplified) == 0) {
261*ccdc9c3eSSadaf Ebrahimi       ASSERT_TRUE(re == sre) << " " << tests[i].regexp
262*ccdc9c3eSSadaf Ebrahimi         << " " << re->ToString() << " " << sre->ToString();
263*ccdc9c3eSSadaf Ebrahimi     }
264*ccdc9c3eSSadaf Ebrahimi 
265*ccdc9c3eSSadaf Ebrahimi     EXPECT_EQ(tests[i].simplified, sre->ToString())
266*ccdc9c3eSSadaf Ebrahimi       << " " << tests[i].regexp << " " << sre->Dump();
267*ccdc9c3eSSadaf Ebrahimi 
268*ccdc9c3eSSadaf Ebrahimi     re->Decref();
269*ccdc9c3eSSadaf Ebrahimi     sre->Decref();
270*ccdc9c3eSSadaf Ebrahimi   }
271*ccdc9c3eSSadaf Ebrahimi }
272*ccdc9c3eSSadaf Ebrahimi 
273*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
274