1*ccdc9c3eSSadaf Ebrahimi // Copyright 2006 The RE2 Authors. All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi
5*ccdc9c3eSSadaf Ebrahimi // Test parse.cc, dump.cc, and tostring.cc.
6*ccdc9c3eSSadaf Ebrahimi
7*ccdc9c3eSSadaf Ebrahimi #include <string>
8*ccdc9c3eSSadaf Ebrahimi
9*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
10*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
11*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
12*ccdc9c3eSSadaf Ebrahimi
13*ccdc9c3eSSadaf Ebrahimi namespace re2 {
14*ccdc9c3eSSadaf Ebrahimi
15*ccdc9c3eSSadaf Ebrahimi // In the past, we used 1<<30 here and zeroed the bit later, but that
16*ccdc9c3eSSadaf Ebrahimi // has undefined behaviour, so now we use an internal-only flag because
17*ccdc9c3eSSadaf Ebrahimi // otherwise we would have to introduce a new flag value just for this.
18*ccdc9c3eSSadaf Ebrahimi static const Regexp::ParseFlags TestZeroFlags = Regexp::WasDollar;
19*ccdc9c3eSSadaf Ebrahimi
20*ccdc9c3eSSadaf Ebrahimi struct Test {
21*ccdc9c3eSSadaf Ebrahimi const char* regexp;
22*ccdc9c3eSSadaf Ebrahimi const char* parse;
23*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags flags;
24*ccdc9c3eSSadaf Ebrahimi };
25*ccdc9c3eSSadaf Ebrahimi
26*ccdc9c3eSSadaf Ebrahimi static Regexp::ParseFlags kTestFlags = Regexp::MatchNL |
27*ccdc9c3eSSadaf Ebrahimi Regexp::PerlX |
28*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses |
29*ccdc9c3eSSadaf Ebrahimi Regexp::UnicodeGroups;
30*ccdc9c3eSSadaf Ebrahimi
31*ccdc9c3eSSadaf Ebrahimi static Test tests[] = {
32*ccdc9c3eSSadaf Ebrahimi // Base cases
33*ccdc9c3eSSadaf Ebrahimi { "a", "lit{a}" },
34*ccdc9c3eSSadaf Ebrahimi { "a.", "cat{lit{a}dot{}}" },
35*ccdc9c3eSSadaf Ebrahimi { "a.b", "cat{lit{a}dot{}lit{b}}" },
36*ccdc9c3eSSadaf Ebrahimi { "ab", "str{ab}" },
37*ccdc9c3eSSadaf Ebrahimi { "a.b.c", "cat{lit{a}dot{}lit{b}dot{}lit{c}}" },
38*ccdc9c3eSSadaf Ebrahimi { "abc", "str{abc}" },
39*ccdc9c3eSSadaf Ebrahimi { "a|^", "alt{lit{a}bol{}}" },
40*ccdc9c3eSSadaf Ebrahimi { "a|b", "cc{0x61-0x62}" },
41*ccdc9c3eSSadaf Ebrahimi { "(a)", "cap{lit{a}}" },
42*ccdc9c3eSSadaf Ebrahimi { "(a)|b", "alt{cap{lit{a}}lit{b}}" },
43*ccdc9c3eSSadaf Ebrahimi { "a*", "star{lit{a}}" },
44*ccdc9c3eSSadaf Ebrahimi { "a+", "plus{lit{a}}" },
45*ccdc9c3eSSadaf Ebrahimi { "a?", "que{lit{a}}" },
46*ccdc9c3eSSadaf Ebrahimi { "a{2}", "rep{2,2 lit{a}}" },
47*ccdc9c3eSSadaf Ebrahimi { "a{2,3}", "rep{2,3 lit{a}}" },
48*ccdc9c3eSSadaf Ebrahimi { "a{2,}", "rep{2,-1 lit{a}}" },
49*ccdc9c3eSSadaf Ebrahimi { "a*?", "nstar{lit{a}}" },
50*ccdc9c3eSSadaf Ebrahimi { "a+?", "nplus{lit{a}}" },
51*ccdc9c3eSSadaf Ebrahimi { "a??", "nque{lit{a}}" },
52*ccdc9c3eSSadaf Ebrahimi { "a{2}?", "nrep{2,2 lit{a}}" },
53*ccdc9c3eSSadaf Ebrahimi { "a{2,3}?", "nrep{2,3 lit{a}}" },
54*ccdc9c3eSSadaf Ebrahimi { "a{2,}?", "nrep{2,-1 lit{a}}" },
55*ccdc9c3eSSadaf Ebrahimi { "", "emp{}" },
56*ccdc9c3eSSadaf Ebrahimi { "|", "alt{emp{}emp{}}" },
57*ccdc9c3eSSadaf Ebrahimi { "|x|", "alt{emp{}lit{x}emp{}}" },
58*ccdc9c3eSSadaf Ebrahimi { ".", "dot{}" },
59*ccdc9c3eSSadaf Ebrahimi { "^", "bol{}" },
60*ccdc9c3eSSadaf Ebrahimi { "$", "eol{}" },
61*ccdc9c3eSSadaf Ebrahimi { "\\|", "lit{|}" },
62*ccdc9c3eSSadaf Ebrahimi { "\\(", "lit{(}" },
63*ccdc9c3eSSadaf Ebrahimi { "\\)", "lit{)}" },
64*ccdc9c3eSSadaf Ebrahimi { "\\*", "lit{*}" },
65*ccdc9c3eSSadaf Ebrahimi { "\\+", "lit{+}" },
66*ccdc9c3eSSadaf Ebrahimi { "\\?", "lit{?}" },
67*ccdc9c3eSSadaf Ebrahimi { "{", "lit{{}" },
68*ccdc9c3eSSadaf Ebrahimi { "}", "lit{}}" },
69*ccdc9c3eSSadaf Ebrahimi { "\\.", "lit{.}" },
70*ccdc9c3eSSadaf Ebrahimi { "\\^", "lit{^}" },
71*ccdc9c3eSSadaf Ebrahimi { "\\$", "lit{$}" },
72*ccdc9c3eSSadaf Ebrahimi { "\\\\", "lit{\\}" },
73*ccdc9c3eSSadaf Ebrahimi { "[ace]", "cc{0x61 0x63 0x65}" },
74*ccdc9c3eSSadaf Ebrahimi { "[abc]", "cc{0x61-0x63}" },
75*ccdc9c3eSSadaf Ebrahimi { "[a-z]", "cc{0x61-0x7a}" },
76*ccdc9c3eSSadaf Ebrahimi { "[a]", "lit{a}" },
77*ccdc9c3eSSadaf Ebrahimi { "\\-", "lit{-}" },
78*ccdc9c3eSSadaf Ebrahimi { "-", "lit{-}" },
79*ccdc9c3eSSadaf Ebrahimi { "\\_", "lit{_}" },
80*ccdc9c3eSSadaf Ebrahimi
81*ccdc9c3eSSadaf Ebrahimi // Posix and Perl extensions
82*ccdc9c3eSSadaf Ebrahimi { "[[:lower:]]", "cc{0x61-0x7a}" },
83*ccdc9c3eSSadaf Ebrahimi { "[a-z]", "cc{0x61-0x7a}" },
84*ccdc9c3eSSadaf Ebrahimi { "[^[:lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
85*ccdc9c3eSSadaf Ebrahimi { "[[:^lower:]]", "cc{0-0x60 0x7b-0x10ffff}" },
86*ccdc9c3eSSadaf Ebrahimi { "(?i)[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
87*ccdc9c3eSSadaf Ebrahimi { "(?i)[a-z]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
88*ccdc9c3eSSadaf Ebrahimi { "(?i)[^[:lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
89*ccdc9c3eSSadaf Ebrahimi { "(?i)[[:^lower:]]", "cc{0-0x40 0x5b-0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
90*ccdc9c3eSSadaf Ebrahimi { "\\d", "cc{0x30-0x39}" },
91*ccdc9c3eSSadaf Ebrahimi { "\\D", "cc{0-0x2f 0x3a-0x10ffff}" },
92*ccdc9c3eSSadaf Ebrahimi { "\\s", "cc{0x9-0xa 0xc-0xd 0x20}" },
93*ccdc9c3eSSadaf Ebrahimi { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}" },
94*ccdc9c3eSSadaf Ebrahimi { "\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a}" },
95*ccdc9c3eSSadaf Ebrahimi { "\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x10ffff}" },
96*ccdc9c3eSSadaf Ebrahimi { "(?i)\\w", "cc{0x30-0x39 0x41-0x5a 0x5f 0x61-0x7a 0x17f 0x212a}" },
97*ccdc9c3eSSadaf Ebrahimi { "(?i)\\W", "cc{0-0x2f 0x3a-0x40 0x5b-0x5e 0x60 0x7b-0x17e 0x180-0x2129 0x212b-0x10ffff}" },
98*ccdc9c3eSSadaf Ebrahimi { "[^\\\\]", "cc{0-0x5b 0x5d-0x10ffff}" },
99*ccdc9c3eSSadaf Ebrahimi { "\\C", "byte{}" },
100*ccdc9c3eSSadaf Ebrahimi
101*ccdc9c3eSSadaf Ebrahimi // Unicode, negatives, and a double negative.
102*ccdc9c3eSSadaf Ebrahimi { "\\p{Braille}", "cc{0x2800-0x28ff}" },
103*ccdc9c3eSSadaf Ebrahimi { "\\P{Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
104*ccdc9c3eSSadaf Ebrahimi { "\\p{^Braille}", "cc{0-0x27ff 0x2900-0x10ffff}" },
105*ccdc9c3eSSadaf Ebrahimi { "\\P{^Braille}", "cc{0x2800-0x28ff}" },
106*ccdc9c3eSSadaf Ebrahimi
107*ccdc9c3eSSadaf Ebrahimi // More interesting regular expressions.
108*ccdc9c3eSSadaf Ebrahimi { "a{,2}", "str{a{,2}}" },
109*ccdc9c3eSSadaf Ebrahimi { "\\.\\^\\$\\\\", "str{.^$\\}" },
110*ccdc9c3eSSadaf Ebrahimi { "[a-zABC]", "cc{0x41-0x43 0x61-0x7a}" },
111*ccdc9c3eSSadaf Ebrahimi { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
112*ccdc9c3eSSadaf Ebrahimi { "[\xce\xb1-\xce\xb5\xe2\x98\xba]", "cc{0x3b1-0x3b5 0x263a}" }, // utf-8
113*ccdc9c3eSSadaf Ebrahimi { "a*{", "cat{star{lit{a}}lit{{}}" },
114*ccdc9c3eSSadaf Ebrahimi
115*ccdc9c3eSSadaf Ebrahimi // Test precedences
116*ccdc9c3eSSadaf Ebrahimi { "(?:ab)*", "star{str{ab}}" },
117*ccdc9c3eSSadaf Ebrahimi { "(ab)*", "star{cap{str{ab}}}" },
118*ccdc9c3eSSadaf Ebrahimi { "ab|cd", "alt{str{ab}str{cd}}" },
119*ccdc9c3eSSadaf Ebrahimi { "a(b|c)d", "cat{lit{a}cap{cc{0x62-0x63}}lit{d}}" },
120*ccdc9c3eSSadaf Ebrahimi
121*ccdc9c3eSSadaf Ebrahimi // Test squashing of **, ++, ?? et cetera.
122*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)*)*", "star{lit{a}}" },
123*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)+)+", "plus{lit{a}}" },
124*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)?)?", "que{lit{a}}" },
125*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)*)+", "star{lit{a}}" },
126*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)*)?", "star{lit{a}}" },
127*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)+)*", "star{lit{a}}" },
128*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)+)?", "star{lit{a}}" },
129*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)?)*", "star{lit{a}}" },
130*ccdc9c3eSSadaf Ebrahimi { "(?:(?:a)?)+", "star{lit{a}}" },
131*ccdc9c3eSSadaf Ebrahimi
132*ccdc9c3eSSadaf Ebrahimi // Test flattening.
133*ccdc9c3eSSadaf Ebrahimi { "(?:a)", "lit{a}" },
134*ccdc9c3eSSadaf Ebrahimi { "(?:ab)(?:cd)", "str{abcd}" },
135*ccdc9c3eSSadaf Ebrahimi { "(?:a|b)|(?:c|d)", "cc{0x61-0x64}" },
136*ccdc9c3eSSadaf Ebrahimi { "a|c", "cc{0x61 0x63}" },
137*ccdc9c3eSSadaf Ebrahimi { "a|[cd]", "cc{0x61 0x63-0x64}" },
138*ccdc9c3eSSadaf Ebrahimi { "a|.", "dot{}" },
139*ccdc9c3eSSadaf Ebrahimi { "[ab]|c", "cc{0x61-0x63}" },
140*ccdc9c3eSSadaf Ebrahimi { "[ab]|[cd]", "cc{0x61-0x64}" },
141*ccdc9c3eSSadaf Ebrahimi { "[ab]|.", "dot{}" },
142*ccdc9c3eSSadaf Ebrahimi { ".|c", "dot{}" },
143*ccdc9c3eSSadaf Ebrahimi { ".|[cd]", "dot{}" },
144*ccdc9c3eSSadaf Ebrahimi { ".|.", "dot{}" },
145*ccdc9c3eSSadaf Ebrahimi
146*ccdc9c3eSSadaf Ebrahimi // Test Perl quoted literals
147*ccdc9c3eSSadaf Ebrahimi { "\\Q+|*?{[\\E", "str{+|*?{[}" },
148*ccdc9c3eSSadaf Ebrahimi { "\\Q+\\E+", "plus{lit{+}}" },
149*ccdc9c3eSSadaf Ebrahimi { "\\Q\\\\E", "lit{\\}" },
150*ccdc9c3eSSadaf Ebrahimi { "\\Q\\\\\\E", "str{\\\\}" },
151*ccdc9c3eSSadaf Ebrahimi { "\\Qa\\E*", "star{lit{a}}" },
152*ccdc9c3eSSadaf Ebrahimi { "\\Qab\\E*", "cat{lit{a}star{lit{b}}}" },
153*ccdc9c3eSSadaf Ebrahimi { "\\Qabc\\E*", "cat{str{ab}star{lit{c}}}" },
154*ccdc9c3eSSadaf Ebrahimi
155*ccdc9c3eSSadaf Ebrahimi // Test Perl \A and \z
156*ccdc9c3eSSadaf Ebrahimi { "(?m)^", "bol{}" },
157*ccdc9c3eSSadaf Ebrahimi { "(?m)$", "eol{}" },
158*ccdc9c3eSSadaf Ebrahimi { "(?-m)^", "bot{}" },
159*ccdc9c3eSSadaf Ebrahimi { "(?-m)$", "eot{}" },
160*ccdc9c3eSSadaf Ebrahimi { "(?m)\\A", "bot{}" },
161*ccdc9c3eSSadaf Ebrahimi { "(?m)\\z", "eot{\\z}" },
162*ccdc9c3eSSadaf Ebrahimi { "(?-m)\\A", "bot{}" },
163*ccdc9c3eSSadaf Ebrahimi { "(?-m)\\z", "eot{\\z}" },
164*ccdc9c3eSSadaf Ebrahimi
165*ccdc9c3eSSadaf Ebrahimi // Test named captures
166*ccdc9c3eSSadaf Ebrahimi { "(?P<name>a)", "cap{name:lit{a}}" },
167*ccdc9c3eSSadaf Ebrahimi
168*ccdc9c3eSSadaf Ebrahimi // Case-folded literals
169*ccdc9c3eSSadaf Ebrahimi { "[Aa]", "litfold{a}" },
170*ccdc9c3eSSadaf Ebrahimi
171*ccdc9c3eSSadaf Ebrahimi // Strings
172*ccdc9c3eSSadaf Ebrahimi { "abcde", "str{abcde}" },
173*ccdc9c3eSSadaf Ebrahimi { "[Aa][Bb]cd", "cat{strfold{ab}str{cd}}" },
174*ccdc9c3eSSadaf Ebrahimi
175*ccdc9c3eSSadaf Ebrahimi // Reported bug involving \n leaking in despite use of NeverNL.
176*ccdc9c3eSSadaf Ebrahimi { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
177*ccdc9c3eSSadaf Ebrahimi { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
178*ccdc9c3eSSadaf Ebrahimi { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
179*ccdc9c3eSSadaf Ebrahimi { "[^ ]", "cc{0-0x9 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
180*ccdc9c3eSSadaf Ebrahimi { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", TestZeroFlags },
181*ccdc9c3eSSadaf Ebrahimi { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
182*ccdc9c3eSSadaf Ebrahimi { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
183*ccdc9c3eSSadaf Ebrahimi { "[^ \f]", "cc{0-0x9 0xb 0xd-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
184*ccdc9c3eSSadaf Ebrahimi { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", TestZeroFlags },
185*ccdc9c3eSSadaf Ebrahimi { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
186*ccdc9c3eSSadaf Ebrahimi { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
187*ccdc9c3eSSadaf Ebrahimi { "[^ \r]", "cc{0-0x9 0xb-0xc 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
188*ccdc9c3eSSadaf Ebrahimi { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", TestZeroFlags },
189*ccdc9c3eSSadaf Ebrahimi { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
190*ccdc9c3eSSadaf Ebrahimi { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
191*ccdc9c3eSSadaf Ebrahimi { "[^ \v]", "cc{0-0x9 0xc-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
192*ccdc9c3eSSadaf Ebrahimi { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", TestZeroFlags },
193*ccdc9c3eSSadaf Ebrahimi { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::FoldCase },
194*ccdc9c3eSSadaf Ebrahimi { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
195*ccdc9c3eSSadaf Ebrahimi { "[^ \t]", "cc{0-0x8 0xb-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
196*ccdc9c3eSSadaf Ebrahimi { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
197*ccdc9c3eSSadaf Ebrahimi { "[^ \r\f\v]", "cc{0-0x9 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
198*ccdc9c3eSSadaf Ebrahimi { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
199*ccdc9c3eSSadaf Ebrahimi { "[^ \r\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
200*ccdc9c3eSSadaf Ebrahimi { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
201*ccdc9c3eSSadaf Ebrahimi { "[^ \r\n\f\t\v]", "cc{0-0x8 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
202*ccdc9c3eSSadaf Ebrahimi { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL },
203*ccdc9c3eSSadaf Ebrahimi { "[^ \r\n\f\t]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}", Regexp::NeverNL | Regexp::FoldCase },
204*ccdc9c3eSSadaf Ebrahimi { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
205*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses },
206*ccdc9c3eSSadaf Ebrahimi { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
207*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::FoldCase },
208*ccdc9c3eSSadaf Ebrahimi { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
209*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::NeverNL },
210*ccdc9c3eSSadaf Ebrahimi { "[^\t-\n\f-\r ]", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
211*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
212*ccdc9c3eSSadaf Ebrahimi { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
213*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses },
214*ccdc9c3eSSadaf Ebrahimi { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
215*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::FoldCase },
216*ccdc9c3eSSadaf Ebrahimi { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
217*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::NeverNL },
218*ccdc9c3eSSadaf Ebrahimi { "\\S", "cc{0-0x8 0xb 0xe-0x1f 0x21-0x10ffff}",
219*ccdc9c3eSSadaf Ebrahimi Regexp::PerlClasses | Regexp::NeverNL | Regexp::FoldCase },
220*ccdc9c3eSSadaf Ebrahimi };
221*ccdc9c3eSSadaf Ebrahimi
RegexpEqualTestingOnly(Regexp * a,Regexp * b)222*ccdc9c3eSSadaf Ebrahimi bool RegexpEqualTestingOnly(Regexp* a, Regexp* b) {
223*ccdc9c3eSSadaf Ebrahimi return Regexp::Equal(a, b);
224*ccdc9c3eSSadaf Ebrahimi }
225*ccdc9c3eSSadaf Ebrahimi
TestParse(const Test * tests,int ntests,Regexp::ParseFlags flags,const string & title)226*ccdc9c3eSSadaf Ebrahimi void TestParse(const Test* tests, int ntests, Regexp::ParseFlags flags,
227*ccdc9c3eSSadaf Ebrahimi const string& title) {
228*ccdc9c3eSSadaf Ebrahimi Regexp** re = new Regexp*[ntests];
229*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < ntests; i++) {
230*ccdc9c3eSSadaf Ebrahimi RegexpStatus status;
231*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags f = flags;
232*ccdc9c3eSSadaf Ebrahimi if (tests[i].flags != 0) {
233*ccdc9c3eSSadaf Ebrahimi f = tests[i].flags & ~TestZeroFlags;
234*ccdc9c3eSSadaf Ebrahimi }
235*ccdc9c3eSSadaf Ebrahimi re[i] = Regexp::Parse(tests[i].regexp, f, &status);
236*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(re[i] != NULL)
237*ccdc9c3eSSadaf Ebrahimi << " " << tests[i].regexp << " " << status.Text();
238*ccdc9c3eSSadaf Ebrahimi string s = re[i]->Dump();
239*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(string(tests[i].parse), s) << "Regexp: " << tests[i].regexp
240*ccdc9c3eSSadaf Ebrahimi << "\nparse: " << string(tests[i].parse) << " s: " << s << " flag=" << f;
241*ccdc9c3eSSadaf Ebrahimi }
242*ccdc9c3eSSadaf Ebrahimi
243*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < ntests; i++) {
244*ccdc9c3eSSadaf Ebrahimi for (int j = 0; j < ntests; j++) {
245*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(string(tests[i].parse) == string(tests[j].parse),
246*ccdc9c3eSSadaf Ebrahimi RegexpEqualTestingOnly(re[i], re[j]))
247*ccdc9c3eSSadaf Ebrahimi << "Regexp: " << tests[i].regexp << " " << tests[j].regexp;
248*ccdc9c3eSSadaf Ebrahimi }
249*ccdc9c3eSSadaf Ebrahimi }
250*ccdc9c3eSSadaf Ebrahimi
251*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < ntests; i++)
252*ccdc9c3eSSadaf Ebrahimi re[i]->Decref();
253*ccdc9c3eSSadaf Ebrahimi delete[] re;
254*ccdc9c3eSSadaf Ebrahimi }
255*ccdc9c3eSSadaf Ebrahimi
256*ccdc9c3eSSadaf Ebrahimi // Test that regexps parse to expected structures.
TEST(TestParse,SimpleRegexps)257*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, SimpleRegexps) {
258*ccdc9c3eSSadaf Ebrahimi TestParse(tests, arraysize(tests), kTestFlags, "simple");
259*ccdc9c3eSSadaf Ebrahimi }
260*ccdc9c3eSSadaf Ebrahimi
261*ccdc9c3eSSadaf Ebrahimi Test foldcase_tests[] = {
262*ccdc9c3eSSadaf Ebrahimi { "AbCdE", "strfold{abcde}" },
263*ccdc9c3eSSadaf Ebrahimi { "[Aa]", "litfold{a}" },
264*ccdc9c3eSSadaf Ebrahimi { "a", "litfold{a}" },
265*ccdc9c3eSSadaf Ebrahimi
266*ccdc9c3eSSadaf Ebrahimi // 0x17F is an old English long s (looks like an f) and folds to s.
267*ccdc9c3eSSadaf Ebrahimi // 0x212A is the Kelvin symbol and folds to k.
268*ccdc9c3eSSadaf Ebrahimi { "A[F-g]", "cat{litfold{a}cc{0x41-0x7a 0x17f 0x212a}}" }, // [Aa][A-z...]
269*ccdc9c3eSSadaf Ebrahimi { "[[:upper:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
270*ccdc9c3eSSadaf Ebrahimi { "[[:lower:]]", "cc{0x41-0x5a 0x61-0x7a 0x17f 0x212a}" },
271*ccdc9c3eSSadaf Ebrahimi };
272*ccdc9c3eSSadaf Ebrahimi
273*ccdc9c3eSSadaf Ebrahimi // Test that parsing with FoldCase works.
TEST(TestParse,FoldCase)274*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, FoldCase) {
275*ccdc9c3eSSadaf Ebrahimi TestParse(foldcase_tests, arraysize(foldcase_tests), Regexp::FoldCase, "foldcase");
276*ccdc9c3eSSadaf Ebrahimi }
277*ccdc9c3eSSadaf Ebrahimi
278*ccdc9c3eSSadaf Ebrahimi Test literal_tests[] = {
279*ccdc9c3eSSadaf Ebrahimi { "(|)^$.[*+?]{5,10},\\", "str{(|)^$.[*+?]{5,10},\\}" },
280*ccdc9c3eSSadaf Ebrahimi };
281*ccdc9c3eSSadaf Ebrahimi
282*ccdc9c3eSSadaf Ebrahimi // Test that parsing with Literal works.
TEST(TestParse,Literal)283*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, Literal) {
284*ccdc9c3eSSadaf Ebrahimi TestParse(literal_tests, arraysize(literal_tests), Regexp::Literal, "literal");
285*ccdc9c3eSSadaf Ebrahimi }
286*ccdc9c3eSSadaf Ebrahimi
287*ccdc9c3eSSadaf Ebrahimi Test matchnl_tests[] = {
288*ccdc9c3eSSadaf Ebrahimi { ".", "dot{}" },
289*ccdc9c3eSSadaf Ebrahimi { "\n", "lit{\n}" },
290*ccdc9c3eSSadaf Ebrahimi { "[^a]", "cc{0-0x60 0x62-0x10ffff}" },
291*ccdc9c3eSSadaf Ebrahimi { "[a\\n]", "cc{0xa 0x61}" },
292*ccdc9c3eSSadaf Ebrahimi };
293*ccdc9c3eSSadaf Ebrahimi
294*ccdc9c3eSSadaf Ebrahimi // Test that parsing with MatchNL works.
295*ccdc9c3eSSadaf Ebrahimi // (Also tested above during simple cases.)
TEST(TestParse,MatchNL)296*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, MatchNL) {
297*ccdc9c3eSSadaf Ebrahimi TestParse(matchnl_tests, arraysize(matchnl_tests), Regexp::MatchNL, "with MatchNL");
298*ccdc9c3eSSadaf Ebrahimi }
299*ccdc9c3eSSadaf Ebrahimi
300*ccdc9c3eSSadaf Ebrahimi Test nomatchnl_tests[] = {
301*ccdc9c3eSSadaf Ebrahimi { ".", "cc{0-0x9 0xb-0x10ffff}" },
302*ccdc9c3eSSadaf Ebrahimi { "\n", "lit{\n}" },
303*ccdc9c3eSSadaf Ebrahimi { "[^a]", "cc{0-0x9 0xb-0x60 0x62-0x10ffff}" },
304*ccdc9c3eSSadaf Ebrahimi { "[a\\n]", "cc{0xa 0x61}" },
305*ccdc9c3eSSadaf Ebrahimi };
306*ccdc9c3eSSadaf Ebrahimi
307*ccdc9c3eSSadaf Ebrahimi // Test that parsing without MatchNL works.
TEST(TestParse,NoMatchNL)308*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, NoMatchNL) {
309*ccdc9c3eSSadaf Ebrahimi TestParse(nomatchnl_tests, arraysize(nomatchnl_tests), Regexp::NoParseFlags, "without MatchNL");
310*ccdc9c3eSSadaf Ebrahimi }
311*ccdc9c3eSSadaf Ebrahimi
312*ccdc9c3eSSadaf Ebrahimi Test prefix_tests[] = {
313*ccdc9c3eSSadaf Ebrahimi { "abc|abd", "cat{str{ab}cc{0x63-0x64}}" },
314*ccdc9c3eSSadaf Ebrahimi { "a(?:b)c|abd", "cat{str{ab}cc{0x63-0x64}}" },
315*ccdc9c3eSSadaf Ebrahimi { "abc|abd|aef|bcx|bcy",
316*ccdc9c3eSSadaf Ebrahimi "alt{cat{lit{a}alt{cat{lit{b}cc{0x63-0x64}}str{ef}}}"
317*ccdc9c3eSSadaf Ebrahimi "cat{str{bc}cc{0x78-0x79}}}" },
318*ccdc9c3eSSadaf Ebrahimi { "abc|x|abd", "alt{str{abc}lit{x}str{abd}}" },
319*ccdc9c3eSSadaf Ebrahimi { "(?i)abc|ABD", "cat{strfold{ab}cc{0x43-0x44 0x63-0x64}}" },
320*ccdc9c3eSSadaf Ebrahimi { "[ab]c|[ab]d", "cat{cc{0x61-0x62}cc{0x63-0x64}}" },
321*ccdc9c3eSSadaf Ebrahimi { ".c|.d", "cat{cc{0-0x9 0xb-0x10ffff}cc{0x63-0x64}}" },
322*ccdc9c3eSSadaf Ebrahimi { "\\Cc|\\Cd", "cat{byte{}cc{0x63-0x64}}" },
323*ccdc9c3eSSadaf Ebrahimi { "x{2}|x{2}[0-9]",
324*ccdc9c3eSSadaf Ebrahimi "cat{rep{2,2 lit{x}}alt{emp{}cc{0x30-0x39}}}" },
325*ccdc9c3eSSadaf Ebrahimi { "x{2}y|x{2}[0-9]y",
326*ccdc9c3eSSadaf Ebrahimi "cat{rep{2,2 lit{x}}alt{lit{y}cat{cc{0x30-0x39}lit{y}}}}" },
327*ccdc9c3eSSadaf Ebrahimi { "n|r|rs",
328*ccdc9c3eSSadaf Ebrahimi "alt{lit{n}cat{lit{r}alt{emp{}lit{s}}}}" },
329*ccdc9c3eSSadaf Ebrahimi { "n|rs|r",
330*ccdc9c3eSSadaf Ebrahimi "alt{lit{n}cat{lit{r}alt{lit{s}emp{}}}}" },
331*ccdc9c3eSSadaf Ebrahimi { "r|rs|n",
332*ccdc9c3eSSadaf Ebrahimi "alt{cat{lit{r}alt{emp{}lit{s}}}lit{n}}" },
333*ccdc9c3eSSadaf Ebrahimi { "rs|r|n",
334*ccdc9c3eSSadaf Ebrahimi "alt{cat{lit{r}alt{lit{s}emp{}}}lit{n}}" },
335*ccdc9c3eSSadaf Ebrahimi { "a\\C*?c|a\\C*?b",
336*ccdc9c3eSSadaf Ebrahimi "cat{lit{a}alt{cat{nstar{byte{}}lit{c}}cat{nstar{byte{}}lit{b}}}}" },
337*ccdc9c3eSSadaf Ebrahimi { "^/a/bc|^/a/de",
338*ccdc9c3eSSadaf Ebrahimi "cat{bol{}cat{str{/a/}alt{str{bc}str{de}}}}" },
339*ccdc9c3eSSadaf Ebrahimi // In the past, factoring was limited to kFactorAlternationMaxDepth (8).
340*ccdc9c3eSSadaf Ebrahimi { "a|aa|aaa|aaaa|aaaaa|aaaaaa|aaaaaaa|aaaaaaaa|aaaaaaaaa|aaaaaaaaaa",
341*ccdc9c3eSSadaf Ebrahimi "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
342*ccdc9c3eSSadaf Ebrahimi "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
343*ccdc9c3eSSadaf Ebrahimi "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}" "cat{lit{a}alt{emp{}"
344*ccdc9c3eSSadaf Ebrahimi "lit{a}}}}}}}}}}}}}}}}}}}" },
345*ccdc9c3eSSadaf Ebrahimi { "a|aardvark|aardvarks|abaci|aback|abacus|abacuses|abaft|abalone|abalones",
346*ccdc9c3eSSadaf Ebrahimi "cat{lit{a}alt{emp{}cat{str{ardvark}alt{emp{}lit{s}}}"
347*ccdc9c3eSSadaf Ebrahimi "cat{str{ba}alt{cat{lit{c}alt{cc{0x69 0x6b}cat{str{us}alt{emp{}str{es}}}}}"
348*ccdc9c3eSSadaf Ebrahimi "str{ft}cat{str{lone}alt{emp{}lit{s}}}}}}}" },
349*ccdc9c3eSSadaf Ebrahimi };
350*ccdc9c3eSSadaf Ebrahimi
351*ccdc9c3eSSadaf Ebrahimi // Test that prefix factoring works.
TEST(TestParse,Prefix)352*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, Prefix) {
353*ccdc9c3eSSadaf Ebrahimi TestParse(prefix_tests, arraysize(prefix_tests), Regexp::PerlX, "prefix");
354*ccdc9c3eSSadaf Ebrahimi }
355*ccdc9c3eSSadaf Ebrahimi
356*ccdc9c3eSSadaf Ebrahimi Test nested_tests[] = {
357*ccdc9c3eSSadaf Ebrahimi { "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}))",
358*ccdc9c3eSSadaf Ebrahimi "cap{cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}}}}}}}}" },
359*ccdc9c3eSSadaf Ebrahimi { "((((((((((x{1}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
360*ccdc9c3eSSadaf Ebrahimi "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{1,1 lit{x}}}}}}}}}}}}}}}}}}}}}" },
361*ccdc9c3eSSadaf Ebrahimi { "((((((((((x{0}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
362*ccdc9c3eSSadaf Ebrahimi "cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 cap{rep{0,0 lit{x}}}}}}}}}}}}}}}}}}}}}" },
363*ccdc9c3eSSadaf Ebrahimi { "((((((x{2}){2}){2}){5}){5}){5})",
364*ccdc9c3eSSadaf Ebrahimi "cap{rep{5,5 cap{rep{5,5 cap{rep{5,5 cap{rep{2,2 cap{rep{2,2 cap{rep{2,2 lit{x}}}}}}}}}}}}}" },
365*ccdc9c3eSSadaf Ebrahimi };
366*ccdc9c3eSSadaf Ebrahimi
367*ccdc9c3eSSadaf Ebrahimi // Test that nested repetition works.
TEST(TestParse,Nested)368*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, Nested) {
369*ccdc9c3eSSadaf Ebrahimi TestParse(nested_tests, arraysize(nested_tests), Regexp::PerlX, "nested");
370*ccdc9c3eSSadaf Ebrahimi }
371*ccdc9c3eSSadaf Ebrahimi
372*ccdc9c3eSSadaf Ebrahimi // Invalid regular expressions
373*ccdc9c3eSSadaf Ebrahimi const char* badtests[] = {
374*ccdc9c3eSSadaf Ebrahimi "(",
375*ccdc9c3eSSadaf Ebrahimi ")",
376*ccdc9c3eSSadaf Ebrahimi "(a",
377*ccdc9c3eSSadaf Ebrahimi "(a|b|",
378*ccdc9c3eSSadaf Ebrahimi "(a|b",
379*ccdc9c3eSSadaf Ebrahimi "[a-z",
380*ccdc9c3eSSadaf Ebrahimi "([a-z)",
381*ccdc9c3eSSadaf Ebrahimi "x{1001}",
382*ccdc9c3eSSadaf Ebrahimi "\xff", // Invalid UTF-8
383*ccdc9c3eSSadaf Ebrahimi "[\xff]",
384*ccdc9c3eSSadaf Ebrahimi "[\\\xff]",
385*ccdc9c3eSSadaf Ebrahimi "\\\xff",
386*ccdc9c3eSSadaf Ebrahimi "(?P<name>a",
387*ccdc9c3eSSadaf Ebrahimi "(?P<name>",
388*ccdc9c3eSSadaf Ebrahimi "(?P<name",
389*ccdc9c3eSSadaf Ebrahimi "(?P<x y>a)",
390*ccdc9c3eSSadaf Ebrahimi "(?P<>a)",
391*ccdc9c3eSSadaf Ebrahimi "[a-Z]",
392*ccdc9c3eSSadaf Ebrahimi "(?i)[a-Z]",
393*ccdc9c3eSSadaf Ebrahimi "a{100000}",
394*ccdc9c3eSSadaf Ebrahimi "a{100000,}",
395*ccdc9c3eSSadaf Ebrahimi "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
396*ccdc9c3eSSadaf Ebrahimi "(((x{7}){11}){13})",
397*ccdc9c3eSSadaf Ebrahimi "\\Q\\E*",
398*ccdc9c3eSSadaf Ebrahimi };
399*ccdc9c3eSSadaf Ebrahimi
400*ccdc9c3eSSadaf Ebrahimi // Valid in Perl, bad in POSIX
401*ccdc9c3eSSadaf Ebrahimi const char* only_perl[] = {
402*ccdc9c3eSSadaf Ebrahimi "[a-b-c]",
403*ccdc9c3eSSadaf Ebrahimi "\\Qabc\\E",
404*ccdc9c3eSSadaf Ebrahimi "\\Q*+?{[\\E",
405*ccdc9c3eSSadaf Ebrahimi "\\Q\\\\E",
406*ccdc9c3eSSadaf Ebrahimi "\\Q\\\\\\E",
407*ccdc9c3eSSadaf Ebrahimi "\\Q\\\\\\\\E",
408*ccdc9c3eSSadaf Ebrahimi "\\Q\\\\\\\\\\E",
409*ccdc9c3eSSadaf Ebrahimi "(?:a)",
410*ccdc9c3eSSadaf Ebrahimi "(?P<name>a)",
411*ccdc9c3eSSadaf Ebrahimi };
412*ccdc9c3eSSadaf Ebrahimi
413*ccdc9c3eSSadaf Ebrahimi // Valid in POSIX, bad in Perl.
414*ccdc9c3eSSadaf Ebrahimi const char* only_posix[] = {
415*ccdc9c3eSSadaf Ebrahimi "a++",
416*ccdc9c3eSSadaf Ebrahimi "a**",
417*ccdc9c3eSSadaf Ebrahimi "a?*",
418*ccdc9c3eSSadaf Ebrahimi "a+*",
419*ccdc9c3eSSadaf Ebrahimi "a{1}*",
420*ccdc9c3eSSadaf Ebrahimi };
421*ccdc9c3eSSadaf Ebrahimi
422*ccdc9c3eSSadaf Ebrahimi // Test that parser rejects bad regexps.
TEST(TestParse,InvalidRegexps)423*ccdc9c3eSSadaf Ebrahimi TEST(TestParse, InvalidRegexps) {
424*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(badtests); i++) {
425*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::PerlX, NULL) == NULL)
426*ccdc9c3eSSadaf Ebrahimi << " " << badtests[i];
427*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(Regexp::Parse(badtests[i], Regexp::NoParseFlags, NULL) == NULL)
428*ccdc9c3eSSadaf Ebrahimi << " " << badtests[i];
429*ccdc9c3eSSadaf Ebrahimi }
430*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(only_posix); i++) {
431*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(Regexp::Parse(only_posix[i], Regexp::PerlX, NULL) == NULL)
432*ccdc9c3eSSadaf Ebrahimi << " " << only_posix[i];
433*ccdc9c3eSSadaf Ebrahimi Regexp* re = Regexp::Parse(only_posix[i], Regexp::NoParseFlags, NULL);
434*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(re != NULL) << " " << only_posix[i];
435*ccdc9c3eSSadaf Ebrahimi re->Decref();
436*ccdc9c3eSSadaf Ebrahimi }
437*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(only_perl); i++) {
438*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(Regexp::Parse(only_perl[i], Regexp::NoParseFlags, NULL) == NULL)
439*ccdc9c3eSSadaf Ebrahimi << " " << only_perl[i];
440*ccdc9c3eSSadaf Ebrahimi Regexp* re = Regexp::Parse(only_perl[i], Regexp::PerlX, NULL);
441*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(re != NULL) << " " << only_perl[i];
442*ccdc9c3eSSadaf Ebrahimi re->Decref();
443*ccdc9c3eSSadaf Ebrahimi }
444*ccdc9c3eSSadaf Ebrahimi }
445*ccdc9c3eSSadaf Ebrahimi
446*ccdc9c3eSSadaf Ebrahimi // Test that ToString produces original regexp or equivalent one.
TEST(TestToString,EquivalentParse)447*ccdc9c3eSSadaf Ebrahimi TEST(TestToString, EquivalentParse) {
448*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(tests); i++) {
449*ccdc9c3eSSadaf Ebrahimi RegexpStatus status;
450*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags f = kTestFlags;
451*ccdc9c3eSSadaf Ebrahimi if (tests[i].flags != 0) {
452*ccdc9c3eSSadaf Ebrahimi f = tests[i].flags & ~TestZeroFlags;
453*ccdc9c3eSSadaf Ebrahimi }
454*ccdc9c3eSSadaf Ebrahimi Regexp* re = Regexp::Parse(tests[i].regexp, f, &status);
455*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(re != NULL) << " " << tests[i].regexp << " " << status.Text();
456*ccdc9c3eSSadaf Ebrahimi string s = re->Dump();
457*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(string(tests[i].parse), s) << " " << tests[i].regexp << " " << string(tests[i].parse) << " " << s;
458*ccdc9c3eSSadaf Ebrahimi string t = re->ToString();
459*ccdc9c3eSSadaf Ebrahimi if (t != tests[i].regexp) {
460*ccdc9c3eSSadaf Ebrahimi // If ToString didn't return the original regexp,
461*ccdc9c3eSSadaf Ebrahimi // it must have found one with fewer parens.
462*ccdc9c3eSSadaf Ebrahimi // Unfortunately we can't check the length here, because
463*ccdc9c3eSSadaf Ebrahimi // ToString produces "\\{" for a literal brace,
464*ccdc9c3eSSadaf Ebrahimi // but "{" is a shorter equivalent.
465*ccdc9c3eSSadaf Ebrahimi // ASSERT_LT(t.size(), strlen(tests[i].regexp))
466*ccdc9c3eSSadaf Ebrahimi // << " t=" << t << " regexp=" << tests[i].regexp;
467*ccdc9c3eSSadaf Ebrahimi
468*ccdc9c3eSSadaf Ebrahimi // Test that if we parse the new regexp we get the same structure.
469*ccdc9c3eSSadaf Ebrahimi Regexp* nre = Regexp::Parse(t, Regexp::MatchNL | Regexp::PerlX, &status);
470*ccdc9c3eSSadaf Ebrahimi ASSERT_TRUE(nre != NULL) << " reparse " << t << " " << status.Text();
471*ccdc9c3eSSadaf Ebrahimi string ss = nre->Dump();
472*ccdc9c3eSSadaf Ebrahimi string tt = nre->ToString();
473*ccdc9c3eSSadaf Ebrahimi if (s != ss || t != tt)
474*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "ToString(" << tests[i].regexp << ") = " << t;
475*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(s, ss);
476*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(t, tt);
477*ccdc9c3eSSadaf Ebrahimi nre->Decref();
478*ccdc9c3eSSadaf Ebrahimi }
479*ccdc9c3eSSadaf Ebrahimi re->Decref();
480*ccdc9c3eSSadaf Ebrahimi }
481*ccdc9c3eSSadaf Ebrahimi }
482*ccdc9c3eSSadaf Ebrahimi
483*ccdc9c3eSSadaf Ebrahimi // Test that capture error args are correct.
TEST(NamedCaptures,ErrorArgs)484*ccdc9c3eSSadaf Ebrahimi TEST(NamedCaptures, ErrorArgs) {
485*ccdc9c3eSSadaf Ebrahimi RegexpStatus status;
486*ccdc9c3eSSadaf Ebrahimi Regexp* re;
487*ccdc9c3eSSadaf Ebrahimi
488*ccdc9c3eSSadaf Ebrahimi re = Regexp::Parse("test(?P<name", Regexp::LikePerl, &status);
489*ccdc9c3eSSadaf Ebrahimi EXPECT_TRUE(re == NULL);
490*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
491*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(status.error_arg(), "(?P<name");
492*ccdc9c3eSSadaf Ebrahimi
493*ccdc9c3eSSadaf Ebrahimi re = Regexp::Parse("test(?P<space bar>z)", Regexp::LikePerl, &status);
494*ccdc9c3eSSadaf Ebrahimi EXPECT_TRUE(re == NULL);
495*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(status.code(), kRegexpBadNamedCapture);
496*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(status.error_arg(), "(?P<space bar>");
497*ccdc9c3eSSadaf Ebrahimi }
498*ccdc9c3eSSadaf Ebrahimi
499*ccdc9c3eSSadaf Ebrahimi } // namespace re2
500