1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors. All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi
5*ccdc9c3eSSadaf Ebrahimi // Regular expression engine tester -- test all the implementations against each other.
6*ccdc9c3eSSadaf Ebrahimi
7*ccdc9c3eSSadaf Ebrahimi #include <stddef.h>
8*ccdc9c3eSSadaf Ebrahimi #include <stdint.h>
9*ccdc9c3eSSadaf Ebrahimi #include <string.h>
10*ccdc9c3eSSadaf Ebrahimi #include <string>
11*ccdc9c3eSSadaf Ebrahimi
12*ccdc9c3eSSadaf Ebrahimi #include "util/util.h"
13*ccdc9c3eSSadaf Ebrahimi #include "util/flags.h"
14*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
15*ccdc9c3eSSadaf Ebrahimi #include "util/strutil.h"
16*ccdc9c3eSSadaf Ebrahimi #include "re2/testing/tester.h"
17*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h"
18*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h"
19*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
20*ccdc9c3eSSadaf Ebrahimi
21*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(dump_prog, false, "dump regexp program");
22*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(log_okay, false, "log successful runs");
23*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
24*ccdc9c3eSSadaf Ebrahimi
25*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(max_regexp_failures, 100,
26*ccdc9c3eSSadaf Ebrahimi "maximum number of regexp test failures (-1 = unlimited)");
27*ccdc9c3eSSadaf Ebrahimi
28*ccdc9c3eSSadaf Ebrahimi DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
29*ccdc9c3eSSadaf Ebrahimi
30*ccdc9c3eSSadaf Ebrahimi namespace re2 {
31*ccdc9c3eSSadaf Ebrahimi
32*ccdc9c3eSSadaf Ebrahimi enum {
33*ccdc9c3eSSadaf Ebrahimi kMaxSubmatch = 1+16, // $0...$16
34*ccdc9c3eSSadaf Ebrahimi };
35*ccdc9c3eSSadaf Ebrahimi
36*ccdc9c3eSSadaf Ebrahimi const char* engine_names[kEngineMax] = {
37*ccdc9c3eSSadaf Ebrahimi "Backtrack",
38*ccdc9c3eSSadaf Ebrahimi "NFA",
39*ccdc9c3eSSadaf Ebrahimi "DFA",
40*ccdc9c3eSSadaf Ebrahimi "DFA1",
41*ccdc9c3eSSadaf Ebrahimi "OnePass",
42*ccdc9c3eSSadaf Ebrahimi "BitState",
43*ccdc9c3eSSadaf Ebrahimi "RE2",
44*ccdc9c3eSSadaf Ebrahimi "RE2a",
45*ccdc9c3eSSadaf Ebrahimi "RE2b",
46*ccdc9c3eSSadaf Ebrahimi "PCRE",
47*ccdc9c3eSSadaf Ebrahimi };
48*ccdc9c3eSSadaf Ebrahimi
49*ccdc9c3eSSadaf Ebrahimi // Returns the name of the engine.
EngineName(Engine e)50*ccdc9c3eSSadaf Ebrahimi static const char* EngineName(Engine e) {
51*ccdc9c3eSSadaf Ebrahimi CHECK_GE(e, 0);
52*ccdc9c3eSSadaf Ebrahimi CHECK_LT(e, arraysize(engine_names));
53*ccdc9c3eSSadaf Ebrahimi CHECK(engine_names[e] != NULL);
54*ccdc9c3eSSadaf Ebrahimi return engine_names[e];
55*ccdc9c3eSSadaf Ebrahimi }
56*ccdc9c3eSSadaf Ebrahimi
57*ccdc9c3eSSadaf Ebrahimi // Returns bit mask of engines to use.
Engines()58*ccdc9c3eSSadaf Ebrahimi static uint32_t Engines() {
59*ccdc9c3eSSadaf Ebrahimi static bool did_parse = false;
60*ccdc9c3eSSadaf Ebrahimi static uint32_t cached_engines = 0;
61*ccdc9c3eSSadaf Ebrahimi
62*ccdc9c3eSSadaf Ebrahimi if (did_parse)
63*ccdc9c3eSSadaf Ebrahimi return cached_engines;
64*ccdc9c3eSSadaf Ebrahimi
65*ccdc9c3eSSadaf Ebrahimi if (FLAGS_regexp_engines.empty()) {
66*ccdc9c3eSSadaf Ebrahimi cached_engines = ~0;
67*ccdc9c3eSSadaf Ebrahimi } else {
68*ccdc9c3eSSadaf Ebrahimi for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
69*ccdc9c3eSSadaf Ebrahimi if (FLAGS_regexp_engines.find(EngineName(i)) != string::npos)
70*ccdc9c3eSSadaf Ebrahimi cached_engines |= 1<<i;
71*ccdc9c3eSSadaf Ebrahimi }
72*ccdc9c3eSSadaf Ebrahimi
73*ccdc9c3eSSadaf Ebrahimi if (cached_engines == 0)
74*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Warning: no engines enabled.";
75*ccdc9c3eSSadaf Ebrahimi if (!UsingPCRE)
76*ccdc9c3eSSadaf Ebrahimi cached_engines &= ~(1<<kEnginePCRE);
77*ccdc9c3eSSadaf Ebrahimi for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
78*ccdc9c3eSSadaf Ebrahimi if (cached_engines & (1<<i))
79*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << EngineName(i) << " enabled";
80*ccdc9c3eSSadaf Ebrahimi }
81*ccdc9c3eSSadaf Ebrahimi
82*ccdc9c3eSSadaf Ebrahimi did_parse = true;
83*ccdc9c3eSSadaf Ebrahimi return cached_engines;
84*ccdc9c3eSSadaf Ebrahimi }
85*ccdc9c3eSSadaf Ebrahimi
86*ccdc9c3eSSadaf Ebrahimi // The result of running a match.
87*ccdc9c3eSSadaf Ebrahimi struct TestInstance::Result {
88*ccdc9c3eSSadaf Ebrahimi bool skipped; // test skipped: wasn't applicable
89*ccdc9c3eSSadaf Ebrahimi bool matched; // found a match
90*ccdc9c3eSSadaf Ebrahimi bool untrusted; // don't really trust the answer
91*ccdc9c3eSSadaf Ebrahimi bool have_submatch; // computed all submatch info
92*ccdc9c3eSSadaf Ebrahimi bool have_submatch0; // computed just submatch[0]
93*ccdc9c3eSSadaf Ebrahimi StringPiece submatch[kMaxSubmatch];
94*ccdc9c3eSSadaf Ebrahimi };
95*ccdc9c3eSSadaf Ebrahimi
96*ccdc9c3eSSadaf Ebrahimi typedef TestInstance::Result Result;
97*ccdc9c3eSSadaf Ebrahimi
98*ccdc9c3eSSadaf Ebrahimi // Formats a single capture range s in text in the form (a,b)
99*ccdc9c3eSSadaf Ebrahimi // where a and b are the starting and ending offsets of s in text.
FormatCapture(const StringPiece & text,const StringPiece & s)100*ccdc9c3eSSadaf Ebrahimi static string FormatCapture(const StringPiece& text, const StringPiece& s) {
101*ccdc9c3eSSadaf Ebrahimi if (s.begin() == NULL)
102*ccdc9c3eSSadaf Ebrahimi return "(?,?)";
103*ccdc9c3eSSadaf Ebrahimi return StringPrintf("(%td,%td)",
104*ccdc9c3eSSadaf Ebrahimi s.begin() - text.begin(), s.end() - text.begin());
105*ccdc9c3eSSadaf Ebrahimi }
106*ccdc9c3eSSadaf Ebrahimi
107*ccdc9c3eSSadaf Ebrahimi // Returns whether text contains non-ASCII (>= 0x80) bytes.
NonASCII(const StringPiece & text)108*ccdc9c3eSSadaf Ebrahimi static bool NonASCII(const StringPiece& text) {
109*ccdc9c3eSSadaf Ebrahimi for (size_t i = 0; i < text.size(); i++)
110*ccdc9c3eSSadaf Ebrahimi if ((uint8_t)text[i] >= 0x80)
111*ccdc9c3eSSadaf Ebrahimi return true;
112*ccdc9c3eSSadaf Ebrahimi return false;
113*ccdc9c3eSSadaf Ebrahimi }
114*ccdc9c3eSSadaf Ebrahimi
115*ccdc9c3eSSadaf Ebrahimi // Returns string representation of match kind.
FormatKind(Prog::MatchKind kind)116*ccdc9c3eSSadaf Ebrahimi static string FormatKind(Prog::MatchKind kind) {
117*ccdc9c3eSSadaf Ebrahimi switch (kind) {
118*ccdc9c3eSSadaf Ebrahimi case Prog::kFullMatch:
119*ccdc9c3eSSadaf Ebrahimi return "full match";
120*ccdc9c3eSSadaf Ebrahimi case Prog::kLongestMatch:
121*ccdc9c3eSSadaf Ebrahimi return "longest match";
122*ccdc9c3eSSadaf Ebrahimi case Prog::kFirstMatch:
123*ccdc9c3eSSadaf Ebrahimi return "first match";
124*ccdc9c3eSSadaf Ebrahimi case Prog::kManyMatch:
125*ccdc9c3eSSadaf Ebrahimi return "many match";
126*ccdc9c3eSSadaf Ebrahimi }
127*ccdc9c3eSSadaf Ebrahimi return "???";
128*ccdc9c3eSSadaf Ebrahimi }
129*ccdc9c3eSSadaf Ebrahimi
130*ccdc9c3eSSadaf Ebrahimi // Returns string representation of anchor kind.
FormatAnchor(Prog::Anchor anchor)131*ccdc9c3eSSadaf Ebrahimi static string FormatAnchor(Prog::Anchor anchor) {
132*ccdc9c3eSSadaf Ebrahimi switch (anchor) {
133*ccdc9c3eSSadaf Ebrahimi case Prog::kAnchored:
134*ccdc9c3eSSadaf Ebrahimi return "anchored";
135*ccdc9c3eSSadaf Ebrahimi case Prog::kUnanchored:
136*ccdc9c3eSSadaf Ebrahimi return "unanchored";
137*ccdc9c3eSSadaf Ebrahimi }
138*ccdc9c3eSSadaf Ebrahimi return "???";
139*ccdc9c3eSSadaf Ebrahimi }
140*ccdc9c3eSSadaf Ebrahimi
141*ccdc9c3eSSadaf Ebrahimi struct ParseMode {
142*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags parse_flags;
143*ccdc9c3eSSadaf Ebrahimi string desc;
144*ccdc9c3eSSadaf Ebrahimi };
145*ccdc9c3eSSadaf Ebrahimi
146*ccdc9c3eSSadaf Ebrahimi static const Regexp::ParseFlags single_line =
147*ccdc9c3eSSadaf Ebrahimi Regexp::LikePerl;
148*ccdc9c3eSSadaf Ebrahimi static const Regexp::ParseFlags multi_line =
149*ccdc9c3eSSadaf Ebrahimi static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
150*ccdc9c3eSSadaf Ebrahimi
151*ccdc9c3eSSadaf Ebrahimi static ParseMode parse_modes[] = {
152*ccdc9c3eSSadaf Ebrahimi { single_line, "single-line" },
153*ccdc9c3eSSadaf Ebrahimi { single_line|Regexp::Latin1, "single-line, latin1" },
154*ccdc9c3eSSadaf Ebrahimi { multi_line, "multiline" },
155*ccdc9c3eSSadaf Ebrahimi { multi_line|Regexp::NonGreedy, "multiline, nongreedy" },
156*ccdc9c3eSSadaf Ebrahimi { multi_line|Regexp::Latin1, "multiline, latin1" },
157*ccdc9c3eSSadaf Ebrahimi };
158*ccdc9c3eSSadaf Ebrahimi
FormatMode(Regexp::ParseFlags flags)159*ccdc9c3eSSadaf Ebrahimi static string FormatMode(Regexp::ParseFlags flags) {
160*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(parse_modes); i++)
161*ccdc9c3eSSadaf Ebrahimi if (parse_modes[i].parse_flags == flags)
162*ccdc9c3eSSadaf Ebrahimi return parse_modes[i].desc;
163*ccdc9c3eSSadaf Ebrahimi return StringPrintf("%#x", static_cast<uint32_t>(flags));
164*ccdc9c3eSSadaf Ebrahimi }
165*ccdc9c3eSSadaf Ebrahimi
166*ccdc9c3eSSadaf Ebrahimi // Constructs and saves all the matching engines that
167*ccdc9c3eSSadaf Ebrahimi // will be required for the given tests.
TestInstance(const StringPiece & regexp_str,Prog::MatchKind kind,Regexp::ParseFlags flags)168*ccdc9c3eSSadaf Ebrahimi TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
169*ccdc9c3eSSadaf Ebrahimi Regexp::ParseFlags flags)
170*ccdc9c3eSSadaf Ebrahimi : regexp_str_(regexp_str),
171*ccdc9c3eSSadaf Ebrahimi kind_(kind),
172*ccdc9c3eSSadaf Ebrahimi flags_(flags),
173*ccdc9c3eSSadaf Ebrahimi error_(false),
174*ccdc9c3eSSadaf Ebrahimi regexp_(NULL),
175*ccdc9c3eSSadaf Ebrahimi num_captures_(0),
176*ccdc9c3eSSadaf Ebrahimi prog_(NULL),
177*ccdc9c3eSSadaf Ebrahimi rprog_(NULL),
178*ccdc9c3eSSadaf Ebrahimi re_(NULL),
179*ccdc9c3eSSadaf Ebrahimi re2_(NULL) {
180*ccdc9c3eSSadaf Ebrahimi
181*ccdc9c3eSSadaf Ebrahimi VLOG(1) << CEscape(regexp_str);
182*ccdc9c3eSSadaf Ebrahimi
183*ccdc9c3eSSadaf Ebrahimi // Compile regexp to prog.
184*ccdc9c3eSSadaf Ebrahimi // Always required - needed for backtracking (reference implementation).
185*ccdc9c3eSSadaf Ebrahimi RegexpStatus status;
186*ccdc9c3eSSadaf Ebrahimi regexp_ = Regexp::Parse(regexp_str, flags, &status);
187*ccdc9c3eSSadaf Ebrahimi if (regexp_ == NULL) {
188*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
189*ccdc9c3eSSadaf Ebrahimi << " mode: " << FormatMode(flags);
190*ccdc9c3eSSadaf Ebrahimi error_ = true;
191*ccdc9c3eSSadaf Ebrahimi return;
192*ccdc9c3eSSadaf Ebrahimi }
193*ccdc9c3eSSadaf Ebrahimi num_captures_ = regexp_->NumCaptures();
194*ccdc9c3eSSadaf Ebrahimi prog_ = regexp_->CompileToProg(0);
195*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL) {
196*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
197*ccdc9c3eSSadaf Ebrahimi error_ = true;
198*ccdc9c3eSSadaf Ebrahimi return;
199*ccdc9c3eSSadaf Ebrahimi }
200*ccdc9c3eSSadaf Ebrahimi if (FLAGS_dump_prog) {
201*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Prog for "
202*ccdc9c3eSSadaf Ebrahimi << " regexp "
203*ccdc9c3eSSadaf Ebrahimi << CEscape(regexp_str_)
204*ccdc9c3eSSadaf Ebrahimi << " (" << FormatKind(kind_)
205*ccdc9c3eSSadaf Ebrahimi << ", " << FormatMode(flags_)
206*ccdc9c3eSSadaf Ebrahimi << ")\n"
207*ccdc9c3eSSadaf Ebrahimi << prog_->Dump();
208*ccdc9c3eSSadaf Ebrahimi }
209*ccdc9c3eSSadaf Ebrahimi
210*ccdc9c3eSSadaf Ebrahimi // Compile regexp to reversed prog. Only needed for DFA engines.
211*ccdc9c3eSSadaf Ebrahimi if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
212*ccdc9c3eSSadaf Ebrahimi rprog_ = regexp_->CompileToReverseProg(0);
213*ccdc9c3eSSadaf Ebrahimi if (rprog_ == NULL) {
214*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
215*ccdc9c3eSSadaf Ebrahimi error_ = true;
216*ccdc9c3eSSadaf Ebrahimi return;
217*ccdc9c3eSSadaf Ebrahimi }
218*ccdc9c3eSSadaf Ebrahimi if (FLAGS_dump_rprog)
219*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << rprog_->Dump();
220*ccdc9c3eSSadaf Ebrahimi }
221*ccdc9c3eSSadaf Ebrahimi
222*ccdc9c3eSSadaf Ebrahimi // Create re string that will be used for RE and RE2.
223*ccdc9c3eSSadaf Ebrahimi string re = string(regexp_str);
224*ccdc9c3eSSadaf Ebrahimi // Accomodate flags.
225*ccdc9c3eSSadaf Ebrahimi // Regexp::Latin1 will be accomodated below.
226*ccdc9c3eSSadaf Ebrahimi if (!(flags & Regexp::OneLine))
227*ccdc9c3eSSadaf Ebrahimi re = "(?m)" + re;
228*ccdc9c3eSSadaf Ebrahimi if (flags & Regexp::NonGreedy)
229*ccdc9c3eSSadaf Ebrahimi re = "(?U)" + re;
230*ccdc9c3eSSadaf Ebrahimi if (flags & Regexp::DotNL)
231*ccdc9c3eSSadaf Ebrahimi re = "(?s)" + re;
232*ccdc9c3eSSadaf Ebrahimi
233*ccdc9c3eSSadaf Ebrahimi // Compile regexp to RE2.
234*ccdc9c3eSSadaf Ebrahimi if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
235*ccdc9c3eSSadaf Ebrahimi RE2::Options options;
236*ccdc9c3eSSadaf Ebrahimi if (flags & Regexp::Latin1)
237*ccdc9c3eSSadaf Ebrahimi options.set_encoding(RE2::Options::EncodingLatin1);
238*ccdc9c3eSSadaf Ebrahimi if (kind_ == Prog::kLongestMatch)
239*ccdc9c3eSSadaf Ebrahimi options.set_longest_match(true);
240*ccdc9c3eSSadaf Ebrahimi re2_ = new RE2(re, options);
241*ccdc9c3eSSadaf Ebrahimi if (!re2_->error().empty()) {
242*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Cannot RE2: " << CEscape(re);
243*ccdc9c3eSSadaf Ebrahimi error_ = true;
244*ccdc9c3eSSadaf Ebrahimi return;
245*ccdc9c3eSSadaf Ebrahimi }
246*ccdc9c3eSSadaf Ebrahimi }
247*ccdc9c3eSSadaf Ebrahimi
248*ccdc9c3eSSadaf Ebrahimi // Compile regexp to RE.
249*ccdc9c3eSSadaf Ebrahimi // PCRE as exposed by the RE interface isn't always usable.
250*ccdc9c3eSSadaf Ebrahimi // 1. It disagrees about handling of empty-string reptitions
251*ccdc9c3eSSadaf Ebrahimi // like matching (a*)* against "b". PCRE treats the (a*) as
252*ccdc9c3eSSadaf Ebrahimi // occurring once, while we treat it as occurring not at all.
253*ccdc9c3eSSadaf Ebrahimi // 2. It treats $ as this weird thing meaning end of string
254*ccdc9c3eSSadaf Ebrahimi // or before the \n at the end of the string.
255*ccdc9c3eSSadaf Ebrahimi // 3. It doesn't implement POSIX leftmost-longest matching.
256*ccdc9c3eSSadaf Ebrahimi // 4. It lets \s match vertical tab.
257*ccdc9c3eSSadaf Ebrahimi // MimicsPCRE() detects 1 and 2.
258*ccdc9c3eSSadaf Ebrahimi if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
259*ccdc9c3eSSadaf Ebrahimi kind_ != Prog::kLongestMatch) {
260*ccdc9c3eSSadaf Ebrahimi PCRE_Options o;
261*ccdc9c3eSSadaf Ebrahimi o.set_option(PCRE::UTF8);
262*ccdc9c3eSSadaf Ebrahimi if (flags & Regexp::Latin1)
263*ccdc9c3eSSadaf Ebrahimi o.set_option(PCRE::None);
264*ccdc9c3eSSadaf Ebrahimi // PCRE has interface bug keeping us from finding $0, so
265*ccdc9c3eSSadaf Ebrahimi // add one more layer of parens.
266*ccdc9c3eSSadaf Ebrahimi re_ = new PCRE("("+re+")", o);
267*ccdc9c3eSSadaf Ebrahimi if (!re_->error().empty()) {
268*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << "Cannot PCRE: " << CEscape(re);
269*ccdc9c3eSSadaf Ebrahimi error_ = true;
270*ccdc9c3eSSadaf Ebrahimi return;
271*ccdc9c3eSSadaf Ebrahimi }
272*ccdc9c3eSSadaf Ebrahimi }
273*ccdc9c3eSSadaf Ebrahimi }
274*ccdc9c3eSSadaf Ebrahimi
~TestInstance()275*ccdc9c3eSSadaf Ebrahimi TestInstance::~TestInstance() {
276*ccdc9c3eSSadaf Ebrahimi if (regexp_)
277*ccdc9c3eSSadaf Ebrahimi regexp_->Decref();
278*ccdc9c3eSSadaf Ebrahimi delete prog_;
279*ccdc9c3eSSadaf Ebrahimi delete rprog_;
280*ccdc9c3eSSadaf Ebrahimi delete re_;
281*ccdc9c3eSSadaf Ebrahimi delete re2_;
282*ccdc9c3eSSadaf Ebrahimi }
283*ccdc9c3eSSadaf Ebrahimi
284*ccdc9c3eSSadaf Ebrahimi // Runs a single search using the named engine type.
285*ccdc9c3eSSadaf Ebrahimi // This interface hides all the irregularities of the various
286*ccdc9c3eSSadaf Ebrahimi // engine interfaces from the rest of this file.
RunSearch(Engine type,const StringPiece & orig_text,const StringPiece & orig_context,Prog::Anchor anchor,Result * result)287*ccdc9c3eSSadaf Ebrahimi void TestInstance::RunSearch(Engine type,
288*ccdc9c3eSSadaf Ebrahimi const StringPiece& orig_text,
289*ccdc9c3eSSadaf Ebrahimi const StringPiece& orig_context,
290*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor,
291*ccdc9c3eSSadaf Ebrahimi Result* result) {
292*ccdc9c3eSSadaf Ebrahimi // Result is not trivial, so we cannot freely clear it with memset(3),
293*ccdc9c3eSSadaf Ebrahimi // but zeroing objects like so is safe and expedient for our purposes.
294*ccdc9c3eSSadaf Ebrahimi memset(reinterpret_cast<void*>(result), 0, sizeof *result);
295*ccdc9c3eSSadaf Ebrahimi if (regexp_ == NULL) {
296*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
297*ccdc9c3eSSadaf Ebrahimi return;
298*ccdc9c3eSSadaf Ebrahimi }
299*ccdc9c3eSSadaf Ebrahimi int nsubmatch = 1 + num_captures_; // NumCaptures doesn't count $0
300*ccdc9c3eSSadaf Ebrahimi if (nsubmatch > kMaxSubmatch)
301*ccdc9c3eSSadaf Ebrahimi nsubmatch = kMaxSubmatch;
302*ccdc9c3eSSadaf Ebrahimi
303*ccdc9c3eSSadaf Ebrahimi StringPiece text = orig_text;
304*ccdc9c3eSSadaf Ebrahimi StringPiece context = orig_context;
305*ccdc9c3eSSadaf Ebrahimi
306*ccdc9c3eSSadaf Ebrahimi switch (type) {
307*ccdc9c3eSSadaf Ebrahimi default:
308*ccdc9c3eSSadaf Ebrahimi LOG(FATAL) << "Bad RunSearch type: " << (int)type;
309*ccdc9c3eSSadaf Ebrahimi
310*ccdc9c3eSSadaf Ebrahimi case kEngineBacktrack:
311*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL) {
312*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
313*ccdc9c3eSSadaf Ebrahimi break;
314*ccdc9c3eSSadaf Ebrahimi }
315*ccdc9c3eSSadaf Ebrahimi result->matched =
316*ccdc9c3eSSadaf Ebrahimi prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
317*ccdc9c3eSSadaf Ebrahimi result->submatch, nsubmatch);
318*ccdc9c3eSSadaf Ebrahimi result->have_submatch = true;
319*ccdc9c3eSSadaf Ebrahimi break;
320*ccdc9c3eSSadaf Ebrahimi
321*ccdc9c3eSSadaf Ebrahimi case kEngineNFA:
322*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL) {
323*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
324*ccdc9c3eSSadaf Ebrahimi break;
325*ccdc9c3eSSadaf Ebrahimi }
326*ccdc9c3eSSadaf Ebrahimi result->matched =
327*ccdc9c3eSSadaf Ebrahimi prog_->SearchNFA(text, context, anchor, kind_,
328*ccdc9c3eSSadaf Ebrahimi result->submatch, nsubmatch);
329*ccdc9c3eSSadaf Ebrahimi result->have_submatch = true;
330*ccdc9c3eSSadaf Ebrahimi break;
331*ccdc9c3eSSadaf Ebrahimi
332*ccdc9c3eSSadaf Ebrahimi case kEngineDFA:
333*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL) {
334*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
335*ccdc9c3eSSadaf Ebrahimi break;
336*ccdc9c3eSSadaf Ebrahimi }
337*ccdc9c3eSSadaf Ebrahimi result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
338*ccdc9c3eSSadaf Ebrahimi &result->skipped, NULL);
339*ccdc9c3eSSadaf Ebrahimi break;
340*ccdc9c3eSSadaf Ebrahimi
341*ccdc9c3eSSadaf Ebrahimi case kEngineDFA1:
342*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL || rprog_ == NULL) {
343*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
344*ccdc9c3eSSadaf Ebrahimi break;
345*ccdc9c3eSSadaf Ebrahimi }
346*ccdc9c3eSSadaf Ebrahimi result->matched =
347*ccdc9c3eSSadaf Ebrahimi prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
348*ccdc9c3eSSadaf Ebrahimi &result->skipped, NULL);
349*ccdc9c3eSSadaf Ebrahimi // If anchored, no need for second run,
350*ccdc9c3eSSadaf Ebrahimi // but do it anyway to find more bugs.
351*ccdc9c3eSSadaf Ebrahimi if (result->matched) {
352*ccdc9c3eSSadaf Ebrahimi if (!rprog_->SearchDFA(result->submatch[0], context,
353*ccdc9c3eSSadaf Ebrahimi Prog::kAnchored, Prog::kLongestMatch,
354*ccdc9c3eSSadaf Ebrahimi result->submatch,
355*ccdc9c3eSSadaf Ebrahimi &result->skipped, NULL)) {
356*ccdc9c3eSSadaf Ebrahimi LOG(ERROR) << "Reverse DFA inconsistency: "
357*ccdc9c3eSSadaf Ebrahimi << CEscape(regexp_str_)
358*ccdc9c3eSSadaf Ebrahimi << " on " << CEscape(text);
359*ccdc9c3eSSadaf Ebrahimi result->matched = false;
360*ccdc9c3eSSadaf Ebrahimi }
361*ccdc9c3eSSadaf Ebrahimi }
362*ccdc9c3eSSadaf Ebrahimi result->have_submatch0 = true;
363*ccdc9c3eSSadaf Ebrahimi break;
364*ccdc9c3eSSadaf Ebrahimi
365*ccdc9c3eSSadaf Ebrahimi case kEngineOnePass:
366*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL ||
367*ccdc9c3eSSadaf Ebrahimi anchor == Prog::kUnanchored ||
368*ccdc9c3eSSadaf Ebrahimi !prog_->IsOnePass() ||
369*ccdc9c3eSSadaf Ebrahimi nsubmatch > Prog::kMaxOnePassCapture) {
370*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
371*ccdc9c3eSSadaf Ebrahimi break;
372*ccdc9c3eSSadaf Ebrahimi }
373*ccdc9c3eSSadaf Ebrahimi result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
374*ccdc9c3eSSadaf Ebrahimi result->submatch, nsubmatch);
375*ccdc9c3eSSadaf Ebrahimi result->have_submatch = true;
376*ccdc9c3eSSadaf Ebrahimi break;
377*ccdc9c3eSSadaf Ebrahimi
378*ccdc9c3eSSadaf Ebrahimi case kEngineBitState:
379*ccdc9c3eSSadaf Ebrahimi if (prog_ == NULL) {
380*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
381*ccdc9c3eSSadaf Ebrahimi break;
382*ccdc9c3eSSadaf Ebrahimi }
383*ccdc9c3eSSadaf Ebrahimi result->matched = prog_->SearchBitState(text, context, anchor, kind_,
384*ccdc9c3eSSadaf Ebrahimi result->submatch, nsubmatch);
385*ccdc9c3eSSadaf Ebrahimi result->have_submatch = true;
386*ccdc9c3eSSadaf Ebrahimi break;
387*ccdc9c3eSSadaf Ebrahimi
388*ccdc9c3eSSadaf Ebrahimi case kEngineRE2:
389*ccdc9c3eSSadaf Ebrahimi case kEngineRE2a:
390*ccdc9c3eSSadaf Ebrahimi case kEngineRE2b: {
391*ccdc9c3eSSadaf Ebrahimi if (!re2_ || text.end() != context.end()) {
392*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
393*ccdc9c3eSSadaf Ebrahimi break;
394*ccdc9c3eSSadaf Ebrahimi }
395*ccdc9c3eSSadaf Ebrahimi
396*ccdc9c3eSSadaf Ebrahimi RE2::Anchor re_anchor;
397*ccdc9c3eSSadaf Ebrahimi if (anchor == Prog::kAnchored)
398*ccdc9c3eSSadaf Ebrahimi re_anchor = RE2::ANCHOR_START;
399*ccdc9c3eSSadaf Ebrahimi else
400*ccdc9c3eSSadaf Ebrahimi re_anchor = RE2::UNANCHORED;
401*ccdc9c3eSSadaf Ebrahimi if (kind_ == Prog::kFullMatch)
402*ccdc9c3eSSadaf Ebrahimi re_anchor = RE2::ANCHOR_BOTH;
403*ccdc9c3eSSadaf Ebrahimi
404*ccdc9c3eSSadaf Ebrahimi result->matched = re2_->Match(
405*ccdc9c3eSSadaf Ebrahimi context,
406*ccdc9c3eSSadaf Ebrahimi static_cast<size_t>(text.begin() - context.begin()),
407*ccdc9c3eSSadaf Ebrahimi static_cast<size_t>(text.end() - context.begin()),
408*ccdc9c3eSSadaf Ebrahimi re_anchor,
409*ccdc9c3eSSadaf Ebrahimi result->submatch,
410*ccdc9c3eSSadaf Ebrahimi nsubmatch);
411*ccdc9c3eSSadaf Ebrahimi result->have_submatch = nsubmatch > 0;
412*ccdc9c3eSSadaf Ebrahimi break;
413*ccdc9c3eSSadaf Ebrahimi }
414*ccdc9c3eSSadaf Ebrahimi
415*ccdc9c3eSSadaf Ebrahimi case kEnginePCRE: {
416*ccdc9c3eSSadaf Ebrahimi if (!re_ || text.begin() != context.begin() ||
417*ccdc9c3eSSadaf Ebrahimi text.end() != context.end()) {
418*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
419*ccdc9c3eSSadaf Ebrahimi break;
420*ccdc9c3eSSadaf Ebrahimi }
421*ccdc9c3eSSadaf Ebrahimi
422*ccdc9c3eSSadaf Ebrahimi // In Perl/PCRE, \v matches any character considered vertical
423*ccdc9c3eSSadaf Ebrahimi // whitespace, not just vertical tab. Regexp::MimicsPCRE() is
424*ccdc9c3eSSadaf Ebrahimi // unable to handle all cases of this, unfortunately, so just
425*ccdc9c3eSSadaf Ebrahimi // catch them here. :(
426*ccdc9c3eSSadaf Ebrahimi if (regexp_str_.find("\\v") != StringPiece::npos &&
427*ccdc9c3eSSadaf Ebrahimi (text.find('\n') != StringPiece::npos ||
428*ccdc9c3eSSadaf Ebrahimi text.find('\f') != StringPiece::npos ||
429*ccdc9c3eSSadaf Ebrahimi text.find('\r') != StringPiece::npos)) {
430*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
431*ccdc9c3eSSadaf Ebrahimi break;
432*ccdc9c3eSSadaf Ebrahimi }
433*ccdc9c3eSSadaf Ebrahimi
434*ccdc9c3eSSadaf Ebrahimi // PCRE 8.34 or so started allowing vertical tab to match \s,
435*ccdc9c3eSSadaf Ebrahimi // following a change made in Perl 5.18. RE2 does not.
436*ccdc9c3eSSadaf Ebrahimi if ((regexp_str_.find("\\s") != StringPiece::npos ||
437*ccdc9c3eSSadaf Ebrahimi regexp_str_.find("\\S") != StringPiece::npos) &&
438*ccdc9c3eSSadaf Ebrahimi text.find('\v') != StringPiece::npos) {
439*ccdc9c3eSSadaf Ebrahimi result->skipped = true;
440*ccdc9c3eSSadaf Ebrahimi break;
441*ccdc9c3eSSadaf Ebrahimi }
442*ccdc9c3eSSadaf Ebrahimi
443*ccdc9c3eSSadaf Ebrahimi const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
444*ccdc9c3eSSadaf Ebrahimi PCRE::Arg *a = new PCRE::Arg[nsubmatch];
445*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < nsubmatch; i++) {
446*ccdc9c3eSSadaf Ebrahimi a[i] = PCRE::Arg(&result->submatch[i]);
447*ccdc9c3eSSadaf Ebrahimi argptr[i] = &a[i];
448*ccdc9c3eSSadaf Ebrahimi }
449*ccdc9c3eSSadaf Ebrahimi size_t consumed;
450*ccdc9c3eSSadaf Ebrahimi PCRE::Anchor pcre_anchor;
451*ccdc9c3eSSadaf Ebrahimi if (anchor == Prog::kAnchored)
452*ccdc9c3eSSadaf Ebrahimi pcre_anchor = PCRE::ANCHOR_START;
453*ccdc9c3eSSadaf Ebrahimi else
454*ccdc9c3eSSadaf Ebrahimi pcre_anchor = PCRE::UNANCHORED;
455*ccdc9c3eSSadaf Ebrahimi if (kind_ == Prog::kFullMatch)
456*ccdc9c3eSSadaf Ebrahimi pcre_anchor = PCRE::ANCHOR_BOTH;
457*ccdc9c3eSSadaf Ebrahimi re_->ClearHitLimit();
458*ccdc9c3eSSadaf Ebrahimi result->matched =
459*ccdc9c3eSSadaf Ebrahimi re_->DoMatch(text,
460*ccdc9c3eSSadaf Ebrahimi pcre_anchor,
461*ccdc9c3eSSadaf Ebrahimi &consumed,
462*ccdc9c3eSSadaf Ebrahimi argptr, nsubmatch);
463*ccdc9c3eSSadaf Ebrahimi if (re_->HitLimit()) {
464*ccdc9c3eSSadaf Ebrahimi result->untrusted = true;
465*ccdc9c3eSSadaf Ebrahimi delete[] argptr;
466*ccdc9c3eSSadaf Ebrahimi delete[] a;
467*ccdc9c3eSSadaf Ebrahimi break;
468*ccdc9c3eSSadaf Ebrahimi }
469*ccdc9c3eSSadaf Ebrahimi result->have_submatch = true;
470*ccdc9c3eSSadaf Ebrahimi delete[] argptr;
471*ccdc9c3eSSadaf Ebrahimi delete[] a;
472*ccdc9c3eSSadaf Ebrahimi break;
473*ccdc9c3eSSadaf Ebrahimi }
474*ccdc9c3eSSadaf Ebrahimi }
475*ccdc9c3eSSadaf Ebrahimi
476*ccdc9c3eSSadaf Ebrahimi if (!result->matched)
477*ccdc9c3eSSadaf Ebrahimi memset(result->submatch, 0, sizeof result->submatch);
478*ccdc9c3eSSadaf Ebrahimi }
479*ccdc9c3eSSadaf Ebrahimi
480*ccdc9c3eSSadaf Ebrahimi // Checks whether r is okay given that correct is the right answer.
481*ccdc9c3eSSadaf Ebrahimi // Specifically, r's answers have to match (but it doesn't have to
482*ccdc9c3eSSadaf Ebrahimi // claim to have all the answers).
ResultOkay(const Result & r,const Result & correct)483*ccdc9c3eSSadaf Ebrahimi static bool ResultOkay(const Result& r, const Result& correct) {
484*ccdc9c3eSSadaf Ebrahimi if (r.skipped)
485*ccdc9c3eSSadaf Ebrahimi return true;
486*ccdc9c3eSSadaf Ebrahimi if (r.matched != correct.matched)
487*ccdc9c3eSSadaf Ebrahimi return false;
488*ccdc9c3eSSadaf Ebrahimi if (r.have_submatch || r.have_submatch0) {
489*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < kMaxSubmatch; i++) {
490*ccdc9c3eSSadaf Ebrahimi if (correct.submatch[i].begin() != r.submatch[i].begin() ||
491*ccdc9c3eSSadaf Ebrahimi correct.submatch[i].size() != r.submatch[i].size())
492*ccdc9c3eSSadaf Ebrahimi return false;
493*ccdc9c3eSSadaf Ebrahimi if (!r.have_submatch)
494*ccdc9c3eSSadaf Ebrahimi break;
495*ccdc9c3eSSadaf Ebrahimi }
496*ccdc9c3eSSadaf Ebrahimi }
497*ccdc9c3eSSadaf Ebrahimi return true;
498*ccdc9c3eSSadaf Ebrahimi }
499*ccdc9c3eSSadaf Ebrahimi
500*ccdc9c3eSSadaf Ebrahimi // Runs a single test.
RunCase(const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)501*ccdc9c3eSSadaf Ebrahimi bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
502*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor) {
503*ccdc9c3eSSadaf Ebrahimi // Backtracking is the gold standard.
504*ccdc9c3eSSadaf Ebrahimi Result correct;
505*ccdc9c3eSSadaf Ebrahimi RunSearch(kEngineBacktrack, text, context, anchor, &correct);
506*ccdc9c3eSSadaf Ebrahimi if (correct.skipped) {
507*ccdc9c3eSSadaf Ebrahimi if (regexp_ == NULL)
508*ccdc9c3eSSadaf Ebrahimi return true;
509*ccdc9c3eSSadaf Ebrahimi LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
510*ccdc9c3eSSadaf Ebrahimi << " " << FormatMode(flags_);
511*ccdc9c3eSSadaf Ebrahimi return false;
512*ccdc9c3eSSadaf Ebrahimi }
513*ccdc9c3eSSadaf Ebrahimi VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
514*ccdc9c3eSSadaf Ebrahimi << " text " << CEscape(text)
515*ccdc9c3eSSadaf Ebrahimi << " (" << FormatKind(kind_)
516*ccdc9c3eSSadaf Ebrahimi << ", " << FormatAnchor(anchor)
517*ccdc9c3eSSadaf Ebrahimi << ", " << FormatMode(flags_)
518*ccdc9c3eSSadaf Ebrahimi << ")";
519*ccdc9c3eSSadaf Ebrahimi
520*ccdc9c3eSSadaf Ebrahimi // Compare the others.
521*ccdc9c3eSSadaf Ebrahimi bool all_okay = true;
522*ccdc9c3eSSadaf Ebrahimi for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
523*ccdc9c3eSSadaf Ebrahimi if (!(Engines() & (1<<i)))
524*ccdc9c3eSSadaf Ebrahimi continue;
525*ccdc9c3eSSadaf Ebrahimi
526*ccdc9c3eSSadaf Ebrahimi Result r;
527*ccdc9c3eSSadaf Ebrahimi RunSearch(i, text, context, anchor, &r);
528*ccdc9c3eSSadaf Ebrahimi if (ResultOkay(r, correct)) {
529*ccdc9c3eSSadaf Ebrahimi if (FLAGS_log_okay)
530*ccdc9c3eSSadaf Ebrahimi LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
531*ccdc9c3eSSadaf Ebrahimi continue;
532*ccdc9c3eSSadaf Ebrahimi }
533*ccdc9c3eSSadaf Ebrahimi
534*ccdc9c3eSSadaf Ebrahimi // We disagree with PCRE on the meaning of some Unicode matches.
535*ccdc9c3eSSadaf Ebrahimi // In particular, we treat non-ASCII UTF-8 as non-word characters.
536*ccdc9c3eSSadaf Ebrahimi // We also treat "empty" character sets like [^\w\W] as being
537*ccdc9c3eSSadaf Ebrahimi // impossible to match, while PCRE apparently excludes some code
538*ccdc9c3eSSadaf Ebrahimi // points (e.g., 0x0080) from both \w and \W.
539*ccdc9c3eSSadaf Ebrahimi if (i == kEnginePCRE && NonASCII(text))
540*ccdc9c3eSSadaf Ebrahimi continue;
541*ccdc9c3eSSadaf Ebrahimi
542*ccdc9c3eSSadaf Ebrahimi if (!r.untrusted)
543*ccdc9c3eSSadaf Ebrahimi all_okay = false;
544*ccdc9c3eSSadaf Ebrahimi
545*ccdc9c3eSSadaf Ebrahimi LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
546*ccdc9c3eSSadaf Ebrahimi context, anchor);
547*ccdc9c3eSSadaf Ebrahimi if (r.matched != correct.matched) {
548*ccdc9c3eSSadaf Ebrahimi if (r.matched) {
549*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << " Should not match (but does).";
550*ccdc9c3eSSadaf Ebrahimi } else {
551*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << " Should match (but does not).";
552*ccdc9c3eSSadaf Ebrahimi continue;
553*ccdc9c3eSSadaf Ebrahimi }
554*ccdc9c3eSSadaf Ebrahimi }
555*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < 1+num_captures_; i++) {
556*ccdc9c3eSSadaf Ebrahimi if (r.submatch[i].begin() != correct.submatch[i].begin() ||
557*ccdc9c3eSSadaf Ebrahimi r.submatch[i].end() != correct.submatch[i].end()) {
558*ccdc9c3eSSadaf Ebrahimi LOG(INFO) <<
559*ccdc9c3eSSadaf Ebrahimi StringPrintf(" $%d: should be %s is %s",
560*ccdc9c3eSSadaf Ebrahimi i,
561*ccdc9c3eSSadaf Ebrahimi FormatCapture(text, correct.submatch[i]).c_str(),
562*ccdc9c3eSSadaf Ebrahimi FormatCapture(text, r.submatch[i]).c_str());
563*ccdc9c3eSSadaf Ebrahimi } else {
564*ccdc9c3eSSadaf Ebrahimi LOG(INFO) <<
565*ccdc9c3eSSadaf Ebrahimi StringPrintf(" $%d: %s ok", i,
566*ccdc9c3eSSadaf Ebrahimi FormatCapture(text, r.submatch[i]).c_str());
567*ccdc9c3eSSadaf Ebrahimi }
568*ccdc9c3eSSadaf Ebrahimi }
569*ccdc9c3eSSadaf Ebrahimi }
570*ccdc9c3eSSadaf Ebrahimi
571*ccdc9c3eSSadaf Ebrahimi if (!all_okay) {
572*ccdc9c3eSSadaf Ebrahimi if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
573*ccdc9c3eSSadaf Ebrahimi LOG(QFATAL) << "Too many regexp failures.";
574*ccdc9c3eSSadaf Ebrahimi }
575*ccdc9c3eSSadaf Ebrahimi
576*ccdc9c3eSSadaf Ebrahimi return all_okay;
577*ccdc9c3eSSadaf Ebrahimi }
578*ccdc9c3eSSadaf Ebrahimi
LogMatch(const char * prefix,Engine e,const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)579*ccdc9c3eSSadaf Ebrahimi void TestInstance::LogMatch(const char* prefix, Engine e,
580*ccdc9c3eSSadaf Ebrahimi const StringPiece& text, const StringPiece& context,
581*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor) {
582*ccdc9c3eSSadaf Ebrahimi LOG(INFO) << prefix
583*ccdc9c3eSSadaf Ebrahimi << EngineName(e)
584*ccdc9c3eSSadaf Ebrahimi << " regexp "
585*ccdc9c3eSSadaf Ebrahimi << CEscape(regexp_str_)
586*ccdc9c3eSSadaf Ebrahimi << " "
587*ccdc9c3eSSadaf Ebrahimi << CEscape(regexp_->ToString())
588*ccdc9c3eSSadaf Ebrahimi << " text "
589*ccdc9c3eSSadaf Ebrahimi << CEscape(text)
590*ccdc9c3eSSadaf Ebrahimi << " ("
591*ccdc9c3eSSadaf Ebrahimi << text.begin() - context.begin()
592*ccdc9c3eSSadaf Ebrahimi << ","
593*ccdc9c3eSSadaf Ebrahimi << text.end() - context.begin()
594*ccdc9c3eSSadaf Ebrahimi << ") of context "
595*ccdc9c3eSSadaf Ebrahimi << CEscape(context)
596*ccdc9c3eSSadaf Ebrahimi << " (" << FormatKind(kind_)
597*ccdc9c3eSSadaf Ebrahimi << ", " << FormatAnchor(anchor)
598*ccdc9c3eSSadaf Ebrahimi << ", " << FormatMode(flags_)
599*ccdc9c3eSSadaf Ebrahimi << ")";
600*ccdc9c3eSSadaf Ebrahimi }
601*ccdc9c3eSSadaf Ebrahimi
602*ccdc9c3eSSadaf Ebrahimi static Prog::MatchKind kinds[] = {
603*ccdc9c3eSSadaf Ebrahimi Prog::kFirstMatch,
604*ccdc9c3eSSadaf Ebrahimi Prog::kLongestMatch,
605*ccdc9c3eSSadaf Ebrahimi Prog::kFullMatch,
606*ccdc9c3eSSadaf Ebrahimi };
607*ccdc9c3eSSadaf Ebrahimi
608*ccdc9c3eSSadaf Ebrahimi // Test all possible match kinds and parse modes.
Tester(const StringPiece & regexp)609*ccdc9c3eSSadaf Ebrahimi Tester::Tester(const StringPiece& regexp) {
610*ccdc9c3eSSadaf Ebrahimi error_ = false;
611*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(kinds); i++) {
612*ccdc9c3eSSadaf Ebrahimi for (int j = 0; j < arraysize(parse_modes); j++) {
613*ccdc9c3eSSadaf Ebrahimi TestInstance* t = new TestInstance(regexp, kinds[i],
614*ccdc9c3eSSadaf Ebrahimi parse_modes[j].parse_flags);
615*ccdc9c3eSSadaf Ebrahimi error_ |= t->error();
616*ccdc9c3eSSadaf Ebrahimi v_.push_back(t);
617*ccdc9c3eSSadaf Ebrahimi }
618*ccdc9c3eSSadaf Ebrahimi }
619*ccdc9c3eSSadaf Ebrahimi }
620*ccdc9c3eSSadaf Ebrahimi
~Tester()621*ccdc9c3eSSadaf Ebrahimi Tester::~Tester() {
622*ccdc9c3eSSadaf Ebrahimi for (size_t i = 0; i < v_.size(); i++)
623*ccdc9c3eSSadaf Ebrahimi delete v_[i];
624*ccdc9c3eSSadaf Ebrahimi }
625*ccdc9c3eSSadaf Ebrahimi
TestCase(const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)626*ccdc9c3eSSadaf Ebrahimi bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
627*ccdc9c3eSSadaf Ebrahimi Prog::Anchor anchor) {
628*ccdc9c3eSSadaf Ebrahimi bool okay = true;
629*ccdc9c3eSSadaf Ebrahimi for (size_t i = 0; i < v_.size(); i++)
630*ccdc9c3eSSadaf Ebrahimi okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
631*ccdc9c3eSSadaf Ebrahimi return okay;
632*ccdc9c3eSSadaf Ebrahimi }
633*ccdc9c3eSSadaf Ebrahimi
634*ccdc9c3eSSadaf Ebrahimi static Prog::Anchor anchors[] = {
635*ccdc9c3eSSadaf Ebrahimi Prog::kAnchored,
636*ccdc9c3eSSadaf Ebrahimi Prog::kUnanchored
637*ccdc9c3eSSadaf Ebrahimi };
638*ccdc9c3eSSadaf Ebrahimi
TestInput(const StringPiece & text)639*ccdc9c3eSSadaf Ebrahimi bool Tester::TestInput(const StringPiece& text) {
640*ccdc9c3eSSadaf Ebrahimi bool okay = TestInputInContext(text, text);
641*ccdc9c3eSSadaf Ebrahimi if (text.size() > 0) {
642*ccdc9c3eSSadaf Ebrahimi StringPiece sp;
643*ccdc9c3eSSadaf Ebrahimi sp = text;
644*ccdc9c3eSSadaf Ebrahimi sp.remove_prefix(1);
645*ccdc9c3eSSadaf Ebrahimi okay &= TestInputInContext(sp, text);
646*ccdc9c3eSSadaf Ebrahimi sp = text;
647*ccdc9c3eSSadaf Ebrahimi sp.remove_suffix(1);
648*ccdc9c3eSSadaf Ebrahimi okay &= TestInputInContext(sp, text);
649*ccdc9c3eSSadaf Ebrahimi }
650*ccdc9c3eSSadaf Ebrahimi return okay;
651*ccdc9c3eSSadaf Ebrahimi }
652*ccdc9c3eSSadaf Ebrahimi
TestInputInContext(const StringPiece & text,const StringPiece & context)653*ccdc9c3eSSadaf Ebrahimi bool Tester::TestInputInContext(const StringPiece& text,
654*ccdc9c3eSSadaf Ebrahimi const StringPiece& context) {
655*ccdc9c3eSSadaf Ebrahimi bool okay = true;
656*ccdc9c3eSSadaf Ebrahimi for (int i = 0; i < arraysize(anchors); i++)
657*ccdc9c3eSSadaf Ebrahimi okay &= TestCase(text, context, anchors[i]);
658*ccdc9c3eSSadaf Ebrahimi return okay;
659*ccdc9c3eSSadaf Ebrahimi }
660*ccdc9c3eSSadaf Ebrahimi
TestRegexpOnText(const StringPiece & regexp,const StringPiece & text)661*ccdc9c3eSSadaf Ebrahimi bool TestRegexpOnText(const StringPiece& regexp,
662*ccdc9c3eSSadaf Ebrahimi const StringPiece& text) {
663*ccdc9c3eSSadaf Ebrahimi Tester t(regexp);
664*ccdc9c3eSSadaf Ebrahimi return t.TestInput(text);
665*ccdc9c3eSSadaf Ebrahimi }
666*ccdc9c3eSSadaf Ebrahimi
667*ccdc9c3eSSadaf Ebrahimi } // namespace re2
668