xref: /aosp_15_r20/external/regex-re2/re2/testing/tester.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi // Regular expression engine tester -- test all the implementations against each other.
6*ccdc9c3eSSadaf Ebrahimi 
7*ccdc9c3eSSadaf Ebrahimi #include <stddef.h>
8*ccdc9c3eSSadaf Ebrahimi #include <stdint.h>
9*ccdc9c3eSSadaf Ebrahimi #include <string.h>
10*ccdc9c3eSSadaf Ebrahimi #include <string>
11*ccdc9c3eSSadaf Ebrahimi 
12*ccdc9c3eSSadaf Ebrahimi #include "util/util.h"
13*ccdc9c3eSSadaf Ebrahimi #include "util/flags.h"
14*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
15*ccdc9c3eSSadaf Ebrahimi #include "util/strutil.h"
16*ccdc9c3eSSadaf Ebrahimi #include "re2/testing/tester.h"
17*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h"
18*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h"
19*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
20*ccdc9c3eSSadaf Ebrahimi 
21*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(dump_prog, false, "dump regexp program");
22*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(log_okay, false, "log successful runs");
23*ccdc9c3eSSadaf Ebrahimi DEFINE_bool(dump_rprog, false, "dump reversed regexp program");
24*ccdc9c3eSSadaf Ebrahimi 
25*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(max_regexp_failures, 100,
26*ccdc9c3eSSadaf Ebrahimi              "maximum number of regexp test failures (-1 = unlimited)");
27*ccdc9c3eSSadaf Ebrahimi 
28*ccdc9c3eSSadaf Ebrahimi DEFINE_string(regexp_engines, "", "pattern to select regexp engines to test");
29*ccdc9c3eSSadaf Ebrahimi 
30*ccdc9c3eSSadaf Ebrahimi namespace re2 {
31*ccdc9c3eSSadaf Ebrahimi 
32*ccdc9c3eSSadaf Ebrahimi enum {
33*ccdc9c3eSSadaf Ebrahimi   kMaxSubmatch = 1+16,  // $0...$16
34*ccdc9c3eSSadaf Ebrahimi };
35*ccdc9c3eSSadaf Ebrahimi 
36*ccdc9c3eSSadaf Ebrahimi const char* engine_names[kEngineMax] = {
37*ccdc9c3eSSadaf Ebrahimi   "Backtrack",
38*ccdc9c3eSSadaf Ebrahimi   "NFA",
39*ccdc9c3eSSadaf Ebrahimi   "DFA",
40*ccdc9c3eSSadaf Ebrahimi   "DFA1",
41*ccdc9c3eSSadaf Ebrahimi   "OnePass",
42*ccdc9c3eSSadaf Ebrahimi   "BitState",
43*ccdc9c3eSSadaf Ebrahimi   "RE2",
44*ccdc9c3eSSadaf Ebrahimi   "RE2a",
45*ccdc9c3eSSadaf Ebrahimi   "RE2b",
46*ccdc9c3eSSadaf Ebrahimi   "PCRE",
47*ccdc9c3eSSadaf Ebrahimi };
48*ccdc9c3eSSadaf Ebrahimi 
49*ccdc9c3eSSadaf Ebrahimi // Returns the name of the engine.
EngineName(Engine e)50*ccdc9c3eSSadaf Ebrahimi static const char* EngineName(Engine e) {
51*ccdc9c3eSSadaf Ebrahimi   CHECK_GE(e, 0);
52*ccdc9c3eSSadaf Ebrahimi   CHECK_LT(e, arraysize(engine_names));
53*ccdc9c3eSSadaf Ebrahimi   CHECK(engine_names[e] != NULL);
54*ccdc9c3eSSadaf Ebrahimi   return engine_names[e];
55*ccdc9c3eSSadaf Ebrahimi }
56*ccdc9c3eSSadaf Ebrahimi 
57*ccdc9c3eSSadaf Ebrahimi // Returns bit mask of engines to use.
Engines()58*ccdc9c3eSSadaf Ebrahimi static uint32_t Engines() {
59*ccdc9c3eSSadaf Ebrahimi   static bool did_parse = false;
60*ccdc9c3eSSadaf Ebrahimi   static uint32_t cached_engines = 0;
61*ccdc9c3eSSadaf Ebrahimi 
62*ccdc9c3eSSadaf Ebrahimi   if (did_parse)
63*ccdc9c3eSSadaf Ebrahimi     return cached_engines;
64*ccdc9c3eSSadaf Ebrahimi 
65*ccdc9c3eSSadaf Ebrahimi   if (FLAGS_regexp_engines.empty()) {
66*ccdc9c3eSSadaf Ebrahimi     cached_engines = ~0;
67*ccdc9c3eSSadaf Ebrahimi   } else {
68*ccdc9c3eSSadaf Ebrahimi     for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++)
69*ccdc9c3eSSadaf Ebrahimi       if (FLAGS_regexp_engines.find(EngineName(i)) != string::npos)
70*ccdc9c3eSSadaf Ebrahimi         cached_engines |= 1<<i;
71*ccdc9c3eSSadaf Ebrahimi   }
72*ccdc9c3eSSadaf Ebrahimi 
73*ccdc9c3eSSadaf Ebrahimi   if (cached_engines == 0)
74*ccdc9c3eSSadaf Ebrahimi     LOG(INFO) << "Warning: no engines enabled.";
75*ccdc9c3eSSadaf Ebrahimi   if (!UsingPCRE)
76*ccdc9c3eSSadaf Ebrahimi     cached_engines &= ~(1<<kEnginePCRE);
77*ccdc9c3eSSadaf Ebrahimi   for (Engine i = static_cast<Engine>(0); i < kEngineMax; i++) {
78*ccdc9c3eSSadaf Ebrahimi     if (cached_engines & (1<<i))
79*ccdc9c3eSSadaf Ebrahimi       LOG(INFO) << EngineName(i) << " enabled";
80*ccdc9c3eSSadaf Ebrahimi   }
81*ccdc9c3eSSadaf Ebrahimi 
82*ccdc9c3eSSadaf Ebrahimi   did_parse = true;
83*ccdc9c3eSSadaf Ebrahimi   return cached_engines;
84*ccdc9c3eSSadaf Ebrahimi }
85*ccdc9c3eSSadaf Ebrahimi 
86*ccdc9c3eSSadaf Ebrahimi // The result of running a match.
87*ccdc9c3eSSadaf Ebrahimi struct TestInstance::Result {
88*ccdc9c3eSSadaf Ebrahimi   bool skipped;         // test skipped: wasn't applicable
89*ccdc9c3eSSadaf Ebrahimi   bool matched;         // found a match
90*ccdc9c3eSSadaf Ebrahimi   bool untrusted;       // don't really trust the answer
91*ccdc9c3eSSadaf Ebrahimi   bool have_submatch;   // computed all submatch info
92*ccdc9c3eSSadaf Ebrahimi   bool have_submatch0;  // computed just submatch[0]
93*ccdc9c3eSSadaf Ebrahimi   StringPiece submatch[kMaxSubmatch];
94*ccdc9c3eSSadaf Ebrahimi };
95*ccdc9c3eSSadaf Ebrahimi 
96*ccdc9c3eSSadaf Ebrahimi typedef TestInstance::Result Result;
97*ccdc9c3eSSadaf Ebrahimi 
98*ccdc9c3eSSadaf Ebrahimi // Formats a single capture range s in text in the form (a,b)
99*ccdc9c3eSSadaf Ebrahimi // where a and b are the starting and ending offsets of s in text.
FormatCapture(const StringPiece & text,const StringPiece & s)100*ccdc9c3eSSadaf Ebrahimi static string FormatCapture(const StringPiece& text, const StringPiece& s) {
101*ccdc9c3eSSadaf Ebrahimi   if (s.begin() == NULL)
102*ccdc9c3eSSadaf Ebrahimi     return "(?,?)";
103*ccdc9c3eSSadaf Ebrahimi   return StringPrintf("(%td,%td)",
104*ccdc9c3eSSadaf Ebrahimi                       s.begin() - text.begin(), s.end() - text.begin());
105*ccdc9c3eSSadaf Ebrahimi }
106*ccdc9c3eSSadaf Ebrahimi 
107*ccdc9c3eSSadaf Ebrahimi // Returns whether text contains non-ASCII (>= 0x80) bytes.
NonASCII(const StringPiece & text)108*ccdc9c3eSSadaf Ebrahimi static bool NonASCII(const StringPiece& text) {
109*ccdc9c3eSSadaf Ebrahimi   for (size_t i = 0; i < text.size(); i++)
110*ccdc9c3eSSadaf Ebrahimi     if ((uint8_t)text[i] >= 0x80)
111*ccdc9c3eSSadaf Ebrahimi       return true;
112*ccdc9c3eSSadaf Ebrahimi   return false;
113*ccdc9c3eSSadaf Ebrahimi }
114*ccdc9c3eSSadaf Ebrahimi 
115*ccdc9c3eSSadaf Ebrahimi // Returns string representation of match kind.
FormatKind(Prog::MatchKind kind)116*ccdc9c3eSSadaf Ebrahimi static string FormatKind(Prog::MatchKind kind) {
117*ccdc9c3eSSadaf Ebrahimi   switch (kind) {
118*ccdc9c3eSSadaf Ebrahimi     case Prog::kFullMatch:
119*ccdc9c3eSSadaf Ebrahimi       return "full match";
120*ccdc9c3eSSadaf Ebrahimi     case Prog::kLongestMatch:
121*ccdc9c3eSSadaf Ebrahimi       return "longest match";
122*ccdc9c3eSSadaf Ebrahimi     case Prog::kFirstMatch:
123*ccdc9c3eSSadaf Ebrahimi       return "first match";
124*ccdc9c3eSSadaf Ebrahimi     case Prog::kManyMatch:
125*ccdc9c3eSSadaf Ebrahimi       return "many match";
126*ccdc9c3eSSadaf Ebrahimi   }
127*ccdc9c3eSSadaf Ebrahimi   return "???";
128*ccdc9c3eSSadaf Ebrahimi }
129*ccdc9c3eSSadaf Ebrahimi 
130*ccdc9c3eSSadaf Ebrahimi // Returns string representation of anchor kind.
FormatAnchor(Prog::Anchor anchor)131*ccdc9c3eSSadaf Ebrahimi static string FormatAnchor(Prog::Anchor anchor) {
132*ccdc9c3eSSadaf Ebrahimi   switch (anchor) {
133*ccdc9c3eSSadaf Ebrahimi     case Prog::kAnchored:
134*ccdc9c3eSSadaf Ebrahimi       return "anchored";
135*ccdc9c3eSSadaf Ebrahimi     case Prog::kUnanchored:
136*ccdc9c3eSSadaf Ebrahimi       return "unanchored";
137*ccdc9c3eSSadaf Ebrahimi   }
138*ccdc9c3eSSadaf Ebrahimi   return "???";
139*ccdc9c3eSSadaf Ebrahimi }
140*ccdc9c3eSSadaf Ebrahimi 
141*ccdc9c3eSSadaf Ebrahimi struct ParseMode {
142*ccdc9c3eSSadaf Ebrahimi   Regexp::ParseFlags parse_flags;
143*ccdc9c3eSSadaf Ebrahimi   string desc;
144*ccdc9c3eSSadaf Ebrahimi };
145*ccdc9c3eSSadaf Ebrahimi 
146*ccdc9c3eSSadaf Ebrahimi static const Regexp::ParseFlags single_line =
147*ccdc9c3eSSadaf Ebrahimi   Regexp::LikePerl;
148*ccdc9c3eSSadaf Ebrahimi static const Regexp::ParseFlags multi_line =
149*ccdc9c3eSSadaf Ebrahimi   static_cast<Regexp::ParseFlags>(Regexp::LikePerl & ~Regexp::OneLine);
150*ccdc9c3eSSadaf Ebrahimi 
151*ccdc9c3eSSadaf Ebrahimi static ParseMode parse_modes[] = {
152*ccdc9c3eSSadaf Ebrahimi   { single_line,                   "single-line"          },
153*ccdc9c3eSSadaf Ebrahimi   { single_line|Regexp::Latin1,    "single-line, latin1"  },
154*ccdc9c3eSSadaf Ebrahimi   { multi_line,                    "multiline"            },
155*ccdc9c3eSSadaf Ebrahimi   { multi_line|Regexp::NonGreedy,  "multiline, nongreedy" },
156*ccdc9c3eSSadaf Ebrahimi   { multi_line|Regexp::Latin1,     "multiline, latin1"    },
157*ccdc9c3eSSadaf Ebrahimi };
158*ccdc9c3eSSadaf Ebrahimi 
FormatMode(Regexp::ParseFlags flags)159*ccdc9c3eSSadaf Ebrahimi static string FormatMode(Regexp::ParseFlags flags) {
160*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(parse_modes); i++)
161*ccdc9c3eSSadaf Ebrahimi     if (parse_modes[i].parse_flags == flags)
162*ccdc9c3eSSadaf Ebrahimi       return parse_modes[i].desc;
163*ccdc9c3eSSadaf Ebrahimi   return StringPrintf("%#x", static_cast<uint32_t>(flags));
164*ccdc9c3eSSadaf Ebrahimi }
165*ccdc9c3eSSadaf Ebrahimi 
166*ccdc9c3eSSadaf Ebrahimi // Constructs and saves all the matching engines that
167*ccdc9c3eSSadaf Ebrahimi // will be required for the given tests.
TestInstance(const StringPiece & regexp_str,Prog::MatchKind kind,Regexp::ParseFlags flags)168*ccdc9c3eSSadaf Ebrahimi TestInstance::TestInstance(const StringPiece& regexp_str, Prog::MatchKind kind,
169*ccdc9c3eSSadaf Ebrahimi                            Regexp::ParseFlags flags)
170*ccdc9c3eSSadaf Ebrahimi   : regexp_str_(regexp_str),
171*ccdc9c3eSSadaf Ebrahimi     kind_(kind),
172*ccdc9c3eSSadaf Ebrahimi     flags_(flags),
173*ccdc9c3eSSadaf Ebrahimi     error_(false),
174*ccdc9c3eSSadaf Ebrahimi     regexp_(NULL),
175*ccdc9c3eSSadaf Ebrahimi     num_captures_(0),
176*ccdc9c3eSSadaf Ebrahimi     prog_(NULL),
177*ccdc9c3eSSadaf Ebrahimi     rprog_(NULL),
178*ccdc9c3eSSadaf Ebrahimi     re_(NULL),
179*ccdc9c3eSSadaf Ebrahimi     re2_(NULL) {
180*ccdc9c3eSSadaf Ebrahimi 
181*ccdc9c3eSSadaf Ebrahimi   VLOG(1) << CEscape(regexp_str);
182*ccdc9c3eSSadaf Ebrahimi 
183*ccdc9c3eSSadaf Ebrahimi   // Compile regexp to prog.
184*ccdc9c3eSSadaf Ebrahimi   // Always required - needed for backtracking (reference implementation).
185*ccdc9c3eSSadaf Ebrahimi   RegexpStatus status;
186*ccdc9c3eSSadaf Ebrahimi   regexp_ = Regexp::Parse(regexp_str, flags, &status);
187*ccdc9c3eSSadaf Ebrahimi   if (regexp_ == NULL) {
188*ccdc9c3eSSadaf Ebrahimi     LOG(INFO) << "Cannot parse: " << CEscape(regexp_str_)
189*ccdc9c3eSSadaf Ebrahimi               << " mode: " << FormatMode(flags);
190*ccdc9c3eSSadaf Ebrahimi     error_ = true;
191*ccdc9c3eSSadaf Ebrahimi     return;
192*ccdc9c3eSSadaf Ebrahimi   }
193*ccdc9c3eSSadaf Ebrahimi   num_captures_ = regexp_->NumCaptures();
194*ccdc9c3eSSadaf Ebrahimi   prog_ = regexp_->CompileToProg(0);
195*ccdc9c3eSSadaf Ebrahimi   if (prog_ == NULL) {
196*ccdc9c3eSSadaf Ebrahimi     LOG(INFO) << "Cannot compile: " << CEscape(regexp_str_);
197*ccdc9c3eSSadaf Ebrahimi     error_ = true;
198*ccdc9c3eSSadaf Ebrahimi     return;
199*ccdc9c3eSSadaf Ebrahimi   }
200*ccdc9c3eSSadaf Ebrahimi   if (FLAGS_dump_prog) {
201*ccdc9c3eSSadaf Ebrahimi     LOG(INFO) << "Prog for "
202*ccdc9c3eSSadaf Ebrahimi               << " regexp "
203*ccdc9c3eSSadaf Ebrahimi               << CEscape(regexp_str_)
204*ccdc9c3eSSadaf Ebrahimi               << " (" << FormatKind(kind_)
205*ccdc9c3eSSadaf Ebrahimi               << ", " << FormatMode(flags_)
206*ccdc9c3eSSadaf Ebrahimi               << ")\n"
207*ccdc9c3eSSadaf Ebrahimi               << prog_->Dump();
208*ccdc9c3eSSadaf Ebrahimi   }
209*ccdc9c3eSSadaf Ebrahimi 
210*ccdc9c3eSSadaf Ebrahimi   // Compile regexp to reversed prog.  Only needed for DFA engines.
211*ccdc9c3eSSadaf Ebrahimi   if (Engines() & ((1<<kEngineDFA)|(1<<kEngineDFA1))) {
212*ccdc9c3eSSadaf Ebrahimi     rprog_ = regexp_->CompileToReverseProg(0);
213*ccdc9c3eSSadaf Ebrahimi     if (rprog_ == NULL) {
214*ccdc9c3eSSadaf Ebrahimi       LOG(INFO) << "Cannot reverse compile: " << CEscape(regexp_str_);
215*ccdc9c3eSSadaf Ebrahimi       error_ = true;
216*ccdc9c3eSSadaf Ebrahimi       return;
217*ccdc9c3eSSadaf Ebrahimi     }
218*ccdc9c3eSSadaf Ebrahimi     if (FLAGS_dump_rprog)
219*ccdc9c3eSSadaf Ebrahimi       LOG(INFO) << rprog_->Dump();
220*ccdc9c3eSSadaf Ebrahimi   }
221*ccdc9c3eSSadaf Ebrahimi 
222*ccdc9c3eSSadaf Ebrahimi   // Create re string that will be used for RE and RE2.
223*ccdc9c3eSSadaf Ebrahimi   string re = string(regexp_str);
224*ccdc9c3eSSadaf Ebrahimi   // Accomodate flags.
225*ccdc9c3eSSadaf Ebrahimi   // Regexp::Latin1 will be accomodated below.
226*ccdc9c3eSSadaf Ebrahimi   if (!(flags & Regexp::OneLine))
227*ccdc9c3eSSadaf Ebrahimi     re = "(?m)" + re;
228*ccdc9c3eSSadaf Ebrahimi   if (flags & Regexp::NonGreedy)
229*ccdc9c3eSSadaf Ebrahimi     re = "(?U)" + re;
230*ccdc9c3eSSadaf Ebrahimi   if (flags & Regexp::DotNL)
231*ccdc9c3eSSadaf Ebrahimi     re = "(?s)" + re;
232*ccdc9c3eSSadaf Ebrahimi 
233*ccdc9c3eSSadaf Ebrahimi   // Compile regexp to RE2.
234*ccdc9c3eSSadaf Ebrahimi   if (Engines() & ((1<<kEngineRE2)|(1<<kEngineRE2a)|(1<<kEngineRE2b))) {
235*ccdc9c3eSSadaf Ebrahimi     RE2::Options options;
236*ccdc9c3eSSadaf Ebrahimi     if (flags & Regexp::Latin1)
237*ccdc9c3eSSadaf Ebrahimi       options.set_encoding(RE2::Options::EncodingLatin1);
238*ccdc9c3eSSadaf Ebrahimi     if (kind_ == Prog::kLongestMatch)
239*ccdc9c3eSSadaf Ebrahimi       options.set_longest_match(true);
240*ccdc9c3eSSadaf Ebrahimi     re2_ = new RE2(re, options);
241*ccdc9c3eSSadaf Ebrahimi     if (!re2_->error().empty()) {
242*ccdc9c3eSSadaf Ebrahimi       LOG(INFO) << "Cannot RE2: " << CEscape(re);
243*ccdc9c3eSSadaf Ebrahimi       error_ = true;
244*ccdc9c3eSSadaf Ebrahimi       return;
245*ccdc9c3eSSadaf Ebrahimi     }
246*ccdc9c3eSSadaf Ebrahimi   }
247*ccdc9c3eSSadaf Ebrahimi 
248*ccdc9c3eSSadaf Ebrahimi   // Compile regexp to RE.
249*ccdc9c3eSSadaf Ebrahimi   // PCRE as exposed by the RE interface isn't always usable.
250*ccdc9c3eSSadaf Ebrahimi   // 1. It disagrees about handling of empty-string reptitions
251*ccdc9c3eSSadaf Ebrahimi   //    like matching (a*)* against "b".  PCRE treats the (a*) as
252*ccdc9c3eSSadaf Ebrahimi   //    occurring once, while we treat it as occurring not at all.
253*ccdc9c3eSSadaf Ebrahimi   // 2. It treats $ as this weird thing meaning end of string
254*ccdc9c3eSSadaf Ebrahimi   //    or before the \n at the end of the string.
255*ccdc9c3eSSadaf Ebrahimi   // 3. It doesn't implement POSIX leftmost-longest matching.
256*ccdc9c3eSSadaf Ebrahimi   // 4. It lets \s match vertical tab.
257*ccdc9c3eSSadaf Ebrahimi   // MimicsPCRE() detects 1 and 2.
258*ccdc9c3eSSadaf Ebrahimi   if ((Engines() & (1<<kEnginePCRE)) && regexp_->MimicsPCRE() &&
259*ccdc9c3eSSadaf Ebrahimi       kind_ != Prog::kLongestMatch) {
260*ccdc9c3eSSadaf Ebrahimi     PCRE_Options o;
261*ccdc9c3eSSadaf Ebrahimi     o.set_option(PCRE::UTF8);
262*ccdc9c3eSSadaf Ebrahimi     if (flags & Regexp::Latin1)
263*ccdc9c3eSSadaf Ebrahimi       o.set_option(PCRE::None);
264*ccdc9c3eSSadaf Ebrahimi     // PCRE has interface bug keeping us from finding $0, so
265*ccdc9c3eSSadaf Ebrahimi     // add one more layer of parens.
266*ccdc9c3eSSadaf Ebrahimi     re_ = new PCRE("("+re+")", o);
267*ccdc9c3eSSadaf Ebrahimi     if (!re_->error().empty()) {
268*ccdc9c3eSSadaf Ebrahimi       LOG(INFO) << "Cannot PCRE: " << CEscape(re);
269*ccdc9c3eSSadaf Ebrahimi       error_ = true;
270*ccdc9c3eSSadaf Ebrahimi       return;
271*ccdc9c3eSSadaf Ebrahimi     }
272*ccdc9c3eSSadaf Ebrahimi   }
273*ccdc9c3eSSadaf Ebrahimi }
274*ccdc9c3eSSadaf Ebrahimi 
~TestInstance()275*ccdc9c3eSSadaf Ebrahimi TestInstance::~TestInstance() {
276*ccdc9c3eSSadaf Ebrahimi   if (regexp_)
277*ccdc9c3eSSadaf Ebrahimi     regexp_->Decref();
278*ccdc9c3eSSadaf Ebrahimi   delete prog_;
279*ccdc9c3eSSadaf Ebrahimi   delete rprog_;
280*ccdc9c3eSSadaf Ebrahimi   delete re_;
281*ccdc9c3eSSadaf Ebrahimi   delete re2_;
282*ccdc9c3eSSadaf Ebrahimi }
283*ccdc9c3eSSadaf Ebrahimi 
284*ccdc9c3eSSadaf Ebrahimi // Runs a single search using the named engine type.
285*ccdc9c3eSSadaf Ebrahimi // This interface hides all the irregularities of the various
286*ccdc9c3eSSadaf Ebrahimi // engine interfaces from the rest of this file.
RunSearch(Engine type,const StringPiece & orig_text,const StringPiece & orig_context,Prog::Anchor anchor,Result * result)287*ccdc9c3eSSadaf Ebrahimi void TestInstance::RunSearch(Engine type,
288*ccdc9c3eSSadaf Ebrahimi                              const StringPiece& orig_text,
289*ccdc9c3eSSadaf Ebrahimi                              const StringPiece& orig_context,
290*ccdc9c3eSSadaf Ebrahimi                              Prog::Anchor anchor,
291*ccdc9c3eSSadaf Ebrahimi                              Result* result) {
292*ccdc9c3eSSadaf Ebrahimi   // Result is not trivial, so we cannot freely clear it with memset(3),
293*ccdc9c3eSSadaf Ebrahimi   // but zeroing objects like so is safe and expedient for our purposes.
294*ccdc9c3eSSadaf Ebrahimi   memset(reinterpret_cast<void*>(result), 0, sizeof *result);
295*ccdc9c3eSSadaf Ebrahimi   if (regexp_ == NULL) {
296*ccdc9c3eSSadaf Ebrahimi     result->skipped = true;
297*ccdc9c3eSSadaf Ebrahimi     return;
298*ccdc9c3eSSadaf Ebrahimi   }
299*ccdc9c3eSSadaf Ebrahimi   int nsubmatch = 1 + num_captures_;  // NumCaptures doesn't count $0
300*ccdc9c3eSSadaf Ebrahimi   if (nsubmatch > kMaxSubmatch)
301*ccdc9c3eSSadaf Ebrahimi     nsubmatch = kMaxSubmatch;
302*ccdc9c3eSSadaf Ebrahimi 
303*ccdc9c3eSSadaf Ebrahimi   StringPiece text = orig_text;
304*ccdc9c3eSSadaf Ebrahimi   StringPiece context = orig_context;
305*ccdc9c3eSSadaf Ebrahimi 
306*ccdc9c3eSSadaf Ebrahimi   switch (type) {
307*ccdc9c3eSSadaf Ebrahimi     default:
308*ccdc9c3eSSadaf Ebrahimi       LOG(FATAL) << "Bad RunSearch type: " << (int)type;
309*ccdc9c3eSSadaf Ebrahimi 
310*ccdc9c3eSSadaf Ebrahimi     case kEngineBacktrack:
311*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL) {
312*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
313*ccdc9c3eSSadaf Ebrahimi         break;
314*ccdc9c3eSSadaf Ebrahimi       }
315*ccdc9c3eSSadaf Ebrahimi       result->matched =
316*ccdc9c3eSSadaf Ebrahimi         prog_->UnsafeSearchBacktrack(text, context, anchor, kind_,
317*ccdc9c3eSSadaf Ebrahimi                                      result->submatch, nsubmatch);
318*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = true;
319*ccdc9c3eSSadaf Ebrahimi       break;
320*ccdc9c3eSSadaf Ebrahimi 
321*ccdc9c3eSSadaf Ebrahimi     case kEngineNFA:
322*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL) {
323*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
324*ccdc9c3eSSadaf Ebrahimi         break;
325*ccdc9c3eSSadaf Ebrahimi       }
326*ccdc9c3eSSadaf Ebrahimi       result->matched =
327*ccdc9c3eSSadaf Ebrahimi         prog_->SearchNFA(text, context, anchor, kind_,
328*ccdc9c3eSSadaf Ebrahimi                         result->submatch, nsubmatch);
329*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = true;
330*ccdc9c3eSSadaf Ebrahimi       break;
331*ccdc9c3eSSadaf Ebrahimi 
332*ccdc9c3eSSadaf Ebrahimi     case kEngineDFA:
333*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL) {
334*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
335*ccdc9c3eSSadaf Ebrahimi         break;
336*ccdc9c3eSSadaf Ebrahimi       }
337*ccdc9c3eSSadaf Ebrahimi       result->matched = prog_->SearchDFA(text, context, anchor, kind_, NULL,
338*ccdc9c3eSSadaf Ebrahimi                                          &result->skipped, NULL);
339*ccdc9c3eSSadaf Ebrahimi       break;
340*ccdc9c3eSSadaf Ebrahimi 
341*ccdc9c3eSSadaf Ebrahimi     case kEngineDFA1:
342*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL || rprog_ == NULL) {
343*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
344*ccdc9c3eSSadaf Ebrahimi         break;
345*ccdc9c3eSSadaf Ebrahimi       }
346*ccdc9c3eSSadaf Ebrahimi       result->matched =
347*ccdc9c3eSSadaf Ebrahimi         prog_->SearchDFA(text, context, anchor, kind_, result->submatch,
348*ccdc9c3eSSadaf Ebrahimi                          &result->skipped, NULL);
349*ccdc9c3eSSadaf Ebrahimi       // If anchored, no need for second run,
350*ccdc9c3eSSadaf Ebrahimi       // but do it anyway to find more bugs.
351*ccdc9c3eSSadaf Ebrahimi       if (result->matched) {
352*ccdc9c3eSSadaf Ebrahimi         if (!rprog_->SearchDFA(result->submatch[0], context,
353*ccdc9c3eSSadaf Ebrahimi                                Prog::kAnchored, Prog::kLongestMatch,
354*ccdc9c3eSSadaf Ebrahimi                                result->submatch,
355*ccdc9c3eSSadaf Ebrahimi                                &result->skipped, NULL)) {
356*ccdc9c3eSSadaf Ebrahimi           LOG(ERROR) << "Reverse DFA inconsistency: "
357*ccdc9c3eSSadaf Ebrahimi                      << CEscape(regexp_str_)
358*ccdc9c3eSSadaf Ebrahimi                      << " on " << CEscape(text);
359*ccdc9c3eSSadaf Ebrahimi           result->matched = false;
360*ccdc9c3eSSadaf Ebrahimi         }
361*ccdc9c3eSSadaf Ebrahimi       }
362*ccdc9c3eSSadaf Ebrahimi       result->have_submatch0 = true;
363*ccdc9c3eSSadaf Ebrahimi       break;
364*ccdc9c3eSSadaf Ebrahimi 
365*ccdc9c3eSSadaf Ebrahimi     case kEngineOnePass:
366*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL ||
367*ccdc9c3eSSadaf Ebrahimi           anchor == Prog::kUnanchored ||
368*ccdc9c3eSSadaf Ebrahimi           !prog_->IsOnePass() ||
369*ccdc9c3eSSadaf Ebrahimi           nsubmatch > Prog::kMaxOnePassCapture) {
370*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
371*ccdc9c3eSSadaf Ebrahimi         break;
372*ccdc9c3eSSadaf Ebrahimi       }
373*ccdc9c3eSSadaf Ebrahimi       result->matched = prog_->SearchOnePass(text, context, anchor, kind_,
374*ccdc9c3eSSadaf Ebrahimi                                       result->submatch, nsubmatch);
375*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = true;
376*ccdc9c3eSSadaf Ebrahimi       break;
377*ccdc9c3eSSadaf Ebrahimi 
378*ccdc9c3eSSadaf Ebrahimi     case kEngineBitState:
379*ccdc9c3eSSadaf Ebrahimi       if (prog_ == NULL) {
380*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
381*ccdc9c3eSSadaf Ebrahimi         break;
382*ccdc9c3eSSadaf Ebrahimi       }
383*ccdc9c3eSSadaf Ebrahimi       result->matched = prog_->SearchBitState(text, context, anchor, kind_,
384*ccdc9c3eSSadaf Ebrahimi                                               result->submatch, nsubmatch);
385*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = true;
386*ccdc9c3eSSadaf Ebrahimi       break;
387*ccdc9c3eSSadaf Ebrahimi 
388*ccdc9c3eSSadaf Ebrahimi     case kEngineRE2:
389*ccdc9c3eSSadaf Ebrahimi     case kEngineRE2a:
390*ccdc9c3eSSadaf Ebrahimi     case kEngineRE2b: {
391*ccdc9c3eSSadaf Ebrahimi       if (!re2_ || text.end() != context.end()) {
392*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
393*ccdc9c3eSSadaf Ebrahimi         break;
394*ccdc9c3eSSadaf Ebrahimi       }
395*ccdc9c3eSSadaf Ebrahimi 
396*ccdc9c3eSSadaf Ebrahimi       RE2::Anchor re_anchor;
397*ccdc9c3eSSadaf Ebrahimi       if (anchor == Prog::kAnchored)
398*ccdc9c3eSSadaf Ebrahimi         re_anchor = RE2::ANCHOR_START;
399*ccdc9c3eSSadaf Ebrahimi       else
400*ccdc9c3eSSadaf Ebrahimi         re_anchor = RE2::UNANCHORED;
401*ccdc9c3eSSadaf Ebrahimi       if (kind_ == Prog::kFullMatch)
402*ccdc9c3eSSadaf Ebrahimi         re_anchor = RE2::ANCHOR_BOTH;
403*ccdc9c3eSSadaf Ebrahimi 
404*ccdc9c3eSSadaf Ebrahimi       result->matched = re2_->Match(
405*ccdc9c3eSSadaf Ebrahimi           context,
406*ccdc9c3eSSadaf Ebrahimi           static_cast<size_t>(text.begin() - context.begin()),
407*ccdc9c3eSSadaf Ebrahimi           static_cast<size_t>(text.end() - context.begin()),
408*ccdc9c3eSSadaf Ebrahimi           re_anchor,
409*ccdc9c3eSSadaf Ebrahimi           result->submatch,
410*ccdc9c3eSSadaf Ebrahimi           nsubmatch);
411*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = nsubmatch > 0;
412*ccdc9c3eSSadaf Ebrahimi       break;
413*ccdc9c3eSSadaf Ebrahimi     }
414*ccdc9c3eSSadaf Ebrahimi 
415*ccdc9c3eSSadaf Ebrahimi     case kEnginePCRE: {
416*ccdc9c3eSSadaf Ebrahimi       if (!re_ || text.begin() != context.begin() ||
417*ccdc9c3eSSadaf Ebrahimi           text.end() != context.end()) {
418*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
419*ccdc9c3eSSadaf Ebrahimi         break;
420*ccdc9c3eSSadaf Ebrahimi       }
421*ccdc9c3eSSadaf Ebrahimi 
422*ccdc9c3eSSadaf Ebrahimi       // In Perl/PCRE, \v matches any character considered vertical
423*ccdc9c3eSSadaf Ebrahimi       // whitespace, not just vertical tab. Regexp::MimicsPCRE() is
424*ccdc9c3eSSadaf Ebrahimi       // unable to handle all cases of this, unfortunately, so just
425*ccdc9c3eSSadaf Ebrahimi       // catch them here. :(
426*ccdc9c3eSSadaf Ebrahimi       if (regexp_str_.find("\\v") != StringPiece::npos &&
427*ccdc9c3eSSadaf Ebrahimi           (text.find('\n') != StringPiece::npos ||
428*ccdc9c3eSSadaf Ebrahimi            text.find('\f') != StringPiece::npos ||
429*ccdc9c3eSSadaf Ebrahimi            text.find('\r') != StringPiece::npos)) {
430*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
431*ccdc9c3eSSadaf Ebrahimi         break;
432*ccdc9c3eSSadaf Ebrahimi       }
433*ccdc9c3eSSadaf Ebrahimi 
434*ccdc9c3eSSadaf Ebrahimi       // PCRE 8.34 or so started allowing vertical tab to match \s,
435*ccdc9c3eSSadaf Ebrahimi       // following a change made in Perl 5.18. RE2 does not.
436*ccdc9c3eSSadaf Ebrahimi       if ((regexp_str_.find("\\s") != StringPiece::npos ||
437*ccdc9c3eSSadaf Ebrahimi            regexp_str_.find("\\S") != StringPiece::npos) &&
438*ccdc9c3eSSadaf Ebrahimi           text.find('\v') != StringPiece::npos) {
439*ccdc9c3eSSadaf Ebrahimi         result->skipped = true;
440*ccdc9c3eSSadaf Ebrahimi         break;
441*ccdc9c3eSSadaf Ebrahimi       }
442*ccdc9c3eSSadaf Ebrahimi 
443*ccdc9c3eSSadaf Ebrahimi       const PCRE::Arg **argptr = new const PCRE::Arg*[nsubmatch];
444*ccdc9c3eSSadaf Ebrahimi       PCRE::Arg *a = new PCRE::Arg[nsubmatch];
445*ccdc9c3eSSadaf Ebrahimi       for (int i = 0; i < nsubmatch; i++) {
446*ccdc9c3eSSadaf Ebrahimi         a[i] = PCRE::Arg(&result->submatch[i]);
447*ccdc9c3eSSadaf Ebrahimi         argptr[i] = &a[i];
448*ccdc9c3eSSadaf Ebrahimi       }
449*ccdc9c3eSSadaf Ebrahimi       size_t consumed;
450*ccdc9c3eSSadaf Ebrahimi       PCRE::Anchor pcre_anchor;
451*ccdc9c3eSSadaf Ebrahimi       if (anchor == Prog::kAnchored)
452*ccdc9c3eSSadaf Ebrahimi         pcre_anchor = PCRE::ANCHOR_START;
453*ccdc9c3eSSadaf Ebrahimi       else
454*ccdc9c3eSSadaf Ebrahimi         pcre_anchor = PCRE::UNANCHORED;
455*ccdc9c3eSSadaf Ebrahimi       if (kind_ == Prog::kFullMatch)
456*ccdc9c3eSSadaf Ebrahimi         pcre_anchor = PCRE::ANCHOR_BOTH;
457*ccdc9c3eSSadaf Ebrahimi       re_->ClearHitLimit();
458*ccdc9c3eSSadaf Ebrahimi       result->matched =
459*ccdc9c3eSSadaf Ebrahimi         re_->DoMatch(text,
460*ccdc9c3eSSadaf Ebrahimi                      pcre_anchor,
461*ccdc9c3eSSadaf Ebrahimi                      &consumed,
462*ccdc9c3eSSadaf Ebrahimi                      argptr, nsubmatch);
463*ccdc9c3eSSadaf Ebrahimi       if (re_->HitLimit()) {
464*ccdc9c3eSSadaf Ebrahimi         result->untrusted = true;
465*ccdc9c3eSSadaf Ebrahimi         delete[] argptr;
466*ccdc9c3eSSadaf Ebrahimi         delete[] a;
467*ccdc9c3eSSadaf Ebrahimi         break;
468*ccdc9c3eSSadaf Ebrahimi       }
469*ccdc9c3eSSadaf Ebrahimi       result->have_submatch = true;
470*ccdc9c3eSSadaf Ebrahimi       delete[] argptr;
471*ccdc9c3eSSadaf Ebrahimi       delete[] a;
472*ccdc9c3eSSadaf Ebrahimi       break;
473*ccdc9c3eSSadaf Ebrahimi     }
474*ccdc9c3eSSadaf Ebrahimi   }
475*ccdc9c3eSSadaf Ebrahimi 
476*ccdc9c3eSSadaf Ebrahimi   if (!result->matched)
477*ccdc9c3eSSadaf Ebrahimi     memset(result->submatch, 0, sizeof result->submatch);
478*ccdc9c3eSSadaf Ebrahimi }
479*ccdc9c3eSSadaf Ebrahimi 
480*ccdc9c3eSSadaf Ebrahimi // Checks whether r is okay given that correct is the right answer.
481*ccdc9c3eSSadaf Ebrahimi // Specifically, r's answers have to match (but it doesn't have to
482*ccdc9c3eSSadaf Ebrahimi // claim to have all the answers).
ResultOkay(const Result & r,const Result & correct)483*ccdc9c3eSSadaf Ebrahimi static bool ResultOkay(const Result& r, const Result& correct) {
484*ccdc9c3eSSadaf Ebrahimi   if (r.skipped)
485*ccdc9c3eSSadaf Ebrahimi     return true;
486*ccdc9c3eSSadaf Ebrahimi   if (r.matched != correct.matched)
487*ccdc9c3eSSadaf Ebrahimi     return false;
488*ccdc9c3eSSadaf Ebrahimi   if (r.have_submatch || r.have_submatch0) {
489*ccdc9c3eSSadaf Ebrahimi     for (int i = 0; i < kMaxSubmatch; i++) {
490*ccdc9c3eSSadaf Ebrahimi       if (correct.submatch[i].begin() != r.submatch[i].begin() ||
491*ccdc9c3eSSadaf Ebrahimi           correct.submatch[i].size() != r.submatch[i].size())
492*ccdc9c3eSSadaf Ebrahimi         return false;
493*ccdc9c3eSSadaf Ebrahimi       if (!r.have_submatch)
494*ccdc9c3eSSadaf Ebrahimi         break;
495*ccdc9c3eSSadaf Ebrahimi     }
496*ccdc9c3eSSadaf Ebrahimi   }
497*ccdc9c3eSSadaf Ebrahimi   return true;
498*ccdc9c3eSSadaf Ebrahimi }
499*ccdc9c3eSSadaf Ebrahimi 
500*ccdc9c3eSSadaf Ebrahimi // Runs a single test.
RunCase(const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)501*ccdc9c3eSSadaf Ebrahimi bool TestInstance::RunCase(const StringPiece& text, const StringPiece& context,
502*ccdc9c3eSSadaf Ebrahimi                            Prog::Anchor anchor) {
503*ccdc9c3eSSadaf Ebrahimi   // Backtracking is the gold standard.
504*ccdc9c3eSSadaf Ebrahimi   Result correct;
505*ccdc9c3eSSadaf Ebrahimi   RunSearch(kEngineBacktrack, text, context, anchor, &correct);
506*ccdc9c3eSSadaf Ebrahimi   if (correct.skipped) {
507*ccdc9c3eSSadaf Ebrahimi     if (regexp_ == NULL)
508*ccdc9c3eSSadaf Ebrahimi       return true;
509*ccdc9c3eSSadaf Ebrahimi     LOG(ERROR) << "Skipped backtracking! " << CEscape(regexp_str_)
510*ccdc9c3eSSadaf Ebrahimi                << " " << FormatMode(flags_);
511*ccdc9c3eSSadaf Ebrahimi     return false;
512*ccdc9c3eSSadaf Ebrahimi   }
513*ccdc9c3eSSadaf Ebrahimi   VLOG(1) << "Try: regexp " << CEscape(regexp_str_)
514*ccdc9c3eSSadaf Ebrahimi           << " text " << CEscape(text)
515*ccdc9c3eSSadaf Ebrahimi           << " (" << FormatKind(kind_)
516*ccdc9c3eSSadaf Ebrahimi           << ", " << FormatAnchor(anchor)
517*ccdc9c3eSSadaf Ebrahimi           << ", " << FormatMode(flags_)
518*ccdc9c3eSSadaf Ebrahimi           << ")";
519*ccdc9c3eSSadaf Ebrahimi 
520*ccdc9c3eSSadaf Ebrahimi   // Compare the others.
521*ccdc9c3eSSadaf Ebrahimi   bool all_okay = true;
522*ccdc9c3eSSadaf Ebrahimi   for (Engine i = kEngineBacktrack+1; i < kEngineMax; i++) {
523*ccdc9c3eSSadaf Ebrahimi     if (!(Engines() & (1<<i)))
524*ccdc9c3eSSadaf Ebrahimi       continue;
525*ccdc9c3eSSadaf Ebrahimi 
526*ccdc9c3eSSadaf Ebrahimi     Result r;
527*ccdc9c3eSSadaf Ebrahimi     RunSearch(i, text, context, anchor, &r);
528*ccdc9c3eSSadaf Ebrahimi     if (ResultOkay(r, correct)) {
529*ccdc9c3eSSadaf Ebrahimi       if (FLAGS_log_okay)
530*ccdc9c3eSSadaf Ebrahimi         LogMatch(r.skipped ? "Skipped: " : "Okay: ", i, text, context, anchor);
531*ccdc9c3eSSadaf Ebrahimi       continue;
532*ccdc9c3eSSadaf Ebrahimi     }
533*ccdc9c3eSSadaf Ebrahimi 
534*ccdc9c3eSSadaf Ebrahimi     // We disagree with PCRE on the meaning of some Unicode matches.
535*ccdc9c3eSSadaf Ebrahimi     // In particular, we treat non-ASCII UTF-8 as non-word characters.
536*ccdc9c3eSSadaf Ebrahimi     // We also treat "empty" character sets like [^\w\W] as being
537*ccdc9c3eSSadaf Ebrahimi     // impossible to match, while PCRE apparently excludes some code
538*ccdc9c3eSSadaf Ebrahimi     // points (e.g., 0x0080) from both \w and \W.
539*ccdc9c3eSSadaf Ebrahimi     if (i == kEnginePCRE && NonASCII(text))
540*ccdc9c3eSSadaf Ebrahimi       continue;
541*ccdc9c3eSSadaf Ebrahimi 
542*ccdc9c3eSSadaf Ebrahimi     if (!r.untrusted)
543*ccdc9c3eSSadaf Ebrahimi       all_okay = false;
544*ccdc9c3eSSadaf Ebrahimi 
545*ccdc9c3eSSadaf Ebrahimi     LogMatch(r.untrusted ? "(Untrusted) Mismatch: " : "Mismatch: ", i, text,
546*ccdc9c3eSSadaf Ebrahimi              context, anchor);
547*ccdc9c3eSSadaf Ebrahimi     if (r.matched != correct.matched) {
548*ccdc9c3eSSadaf Ebrahimi       if (r.matched) {
549*ccdc9c3eSSadaf Ebrahimi         LOG(INFO) << "   Should not match (but does).";
550*ccdc9c3eSSadaf Ebrahimi       } else {
551*ccdc9c3eSSadaf Ebrahimi         LOG(INFO) << "   Should match (but does not).";
552*ccdc9c3eSSadaf Ebrahimi         continue;
553*ccdc9c3eSSadaf Ebrahimi       }
554*ccdc9c3eSSadaf Ebrahimi     }
555*ccdc9c3eSSadaf Ebrahimi     for (int i = 0; i < 1+num_captures_; i++) {
556*ccdc9c3eSSadaf Ebrahimi       if (r.submatch[i].begin() != correct.submatch[i].begin() ||
557*ccdc9c3eSSadaf Ebrahimi           r.submatch[i].end() != correct.submatch[i].end()) {
558*ccdc9c3eSSadaf Ebrahimi         LOG(INFO) <<
559*ccdc9c3eSSadaf Ebrahimi           StringPrintf("   $%d: should be %s is %s",
560*ccdc9c3eSSadaf Ebrahimi                        i,
561*ccdc9c3eSSadaf Ebrahimi                        FormatCapture(text, correct.submatch[i]).c_str(),
562*ccdc9c3eSSadaf Ebrahimi                        FormatCapture(text, r.submatch[i]).c_str());
563*ccdc9c3eSSadaf Ebrahimi       } else {
564*ccdc9c3eSSadaf Ebrahimi         LOG(INFO) <<
565*ccdc9c3eSSadaf Ebrahimi           StringPrintf("   $%d: %s ok", i,
566*ccdc9c3eSSadaf Ebrahimi                        FormatCapture(text, r.submatch[i]).c_str());
567*ccdc9c3eSSadaf Ebrahimi       }
568*ccdc9c3eSSadaf Ebrahimi     }
569*ccdc9c3eSSadaf Ebrahimi   }
570*ccdc9c3eSSadaf Ebrahimi 
571*ccdc9c3eSSadaf Ebrahimi   if (!all_okay) {
572*ccdc9c3eSSadaf Ebrahimi     if (FLAGS_max_regexp_failures > 0 && --FLAGS_max_regexp_failures == 0)
573*ccdc9c3eSSadaf Ebrahimi       LOG(QFATAL) << "Too many regexp failures.";
574*ccdc9c3eSSadaf Ebrahimi   }
575*ccdc9c3eSSadaf Ebrahimi 
576*ccdc9c3eSSadaf Ebrahimi   return all_okay;
577*ccdc9c3eSSadaf Ebrahimi }
578*ccdc9c3eSSadaf Ebrahimi 
LogMatch(const char * prefix,Engine e,const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)579*ccdc9c3eSSadaf Ebrahimi void TestInstance::LogMatch(const char* prefix, Engine e,
580*ccdc9c3eSSadaf Ebrahimi                             const StringPiece& text, const StringPiece& context,
581*ccdc9c3eSSadaf Ebrahimi                             Prog::Anchor anchor) {
582*ccdc9c3eSSadaf Ebrahimi   LOG(INFO) << prefix
583*ccdc9c3eSSadaf Ebrahimi     << EngineName(e)
584*ccdc9c3eSSadaf Ebrahimi     << " regexp "
585*ccdc9c3eSSadaf Ebrahimi     << CEscape(regexp_str_)
586*ccdc9c3eSSadaf Ebrahimi     << " "
587*ccdc9c3eSSadaf Ebrahimi     << CEscape(regexp_->ToString())
588*ccdc9c3eSSadaf Ebrahimi     << " text "
589*ccdc9c3eSSadaf Ebrahimi     << CEscape(text)
590*ccdc9c3eSSadaf Ebrahimi     << " ("
591*ccdc9c3eSSadaf Ebrahimi     << text.begin() - context.begin()
592*ccdc9c3eSSadaf Ebrahimi     << ","
593*ccdc9c3eSSadaf Ebrahimi     << text.end() - context.begin()
594*ccdc9c3eSSadaf Ebrahimi     << ") of context "
595*ccdc9c3eSSadaf Ebrahimi     << CEscape(context)
596*ccdc9c3eSSadaf Ebrahimi     << " (" << FormatKind(kind_)
597*ccdc9c3eSSadaf Ebrahimi     << ", " << FormatAnchor(anchor)
598*ccdc9c3eSSadaf Ebrahimi     << ", " << FormatMode(flags_)
599*ccdc9c3eSSadaf Ebrahimi     << ")";
600*ccdc9c3eSSadaf Ebrahimi }
601*ccdc9c3eSSadaf Ebrahimi 
602*ccdc9c3eSSadaf Ebrahimi static Prog::MatchKind kinds[] = {
603*ccdc9c3eSSadaf Ebrahimi   Prog::kFirstMatch,
604*ccdc9c3eSSadaf Ebrahimi   Prog::kLongestMatch,
605*ccdc9c3eSSadaf Ebrahimi   Prog::kFullMatch,
606*ccdc9c3eSSadaf Ebrahimi };
607*ccdc9c3eSSadaf Ebrahimi 
608*ccdc9c3eSSadaf Ebrahimi // Test all possible match kinds and parse modes.
Tester(const StringPiece & regexp)609*ccdc9c3eSSadaf Ebrahimi Tester::Tester(const StringPiece& regexp) {
610*ccdc9c3eSSadaf Ebrahimi   error_ = false;
611*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(kinds); i++) {
612*ccdc9c3eSSadaf Ebrahimi     for (int j = 0; j < arraysize(parse_modes); j++) {
613*ccdc9c3eSSadaf Ebrahimi       TestInstance* t = new TestInstance(regexp, kinds[i],
614*ccdc9c3eSSadaf Ebrahimi                                          parse_modes[j].parse_flags);
615*ccdc9c3eSSadaf Ebrahimi       error_ |= t->error();
616*ccdc9c3eSSadaf Ebrahimi       v_.push_back(t);
617*ccdc9c3eSSadaf Ebrahimi     }
618*ccdc9c3eSSadaf Ebrahimi   }
619*ccdc9c3eSSadaf Ebrahimi }
620*ccdc9c3eSSadaf Ebrahimi 
~Tester()621*ccdc9c3eSSadaf Ebrahimi Tester::~Tester() {
622*ccdc9c3eSSadaf Ebrahimi   for (size_t i = 0; i < v_.size(); i++)
623*ccdc9c3eSSadaf Ebrahimi     delete v_[i];
624*ccdc9c3eSSadaf Ebrahimi }
625*ccdc9c3eSSadaf Ebrahimi 
TestCase(const StringPiece & text,const StringPiece & context,Prog::Anchor anchor)626*ccdc9c3eSSadaf Ebrahimi bool Tester::TestCase(const StringPiece& text, const StringPiece& context,
627*ccdc9c3eSSadaf Ebrahimi                          Prog::Anchor anchor) {
628*ccdc9c3eSSadaf Ebrahimi   bool okay = true;
629*ccdc9c3eSSadaf Ebrahimi   for (size_t i = 0; i < v_.size(); i++)
630*ccdc9c3eSSadaf Ebrahimi     okay &= (!v_[i]->error() && v_[i]->RunCase(text, context, anchor));
631*ccdc9c3eSSadaf Ebrahimi   return okay;
632*ccdc9c3eSSadaf Ebrahimi }
633*ccdc9c3eSSadaf Ebrahimi 
634*ccdc9c3eSSadaf Ebrahimi static Prog::Anchor anchors[] = {
635*ccdc9c3eSSadaf Ebrahimi   Prog::kAnchored,
636*ccdc9c3eSSadaf Ebrahimi   Prog::kUnanchored
637*ccdc9c3eSSadaf Ebrahimi };
638*ccdc9c3eSSadaf Ebrahimi 
TestInput(const StringPiece & text)639*ccdc9c3eSSadaf Ebrahimi bool Tester::TestInput(const StringPiece& text) {
640*ccdc9c3eSSadaf Ebrahimi   bool okay = TestInputInContext(text, text);
641*ccdc9c3eSSadaf Ebrahimi   if (text.size() > 0) {
642*ccdc9c3eSSadaf Ebrahimi     StringPiece sp;
643*ccdc9c3eSSadaf Ebrahimi     sp = text;
644*ccdc9c3eSSadaf Ebrahimi     sp.remove_prefix(1);
645*ccdc9c3eSSadaf Ebrahimi     okay &= TestInputInContext(sp, text);
646*ccdc9c3eSSadaf Ebrahimi     sp = text;
647*ccdc9c3eSSadaf Ebrahimi     sp.remove_suffix(1);
648*ccdc9c3eSSadaf Ebrahimi     okay &= TestInputInContext(sp, text);
649*ccdc9c3eSSadaf Ebrahimi   }
650*ccdc9c3eSSadaf Ebrahimi   return okay;
651*ccdc9c3eSSadaf Ebrahimi }
652*ccdc9c3eSSadaf Ebrahimi 
TestInputInContext(const StringPiece & text,const StringPiece & context)653*ccdc9c3eSSadaf Ebrahimi bool Tester::TestInputInContext(const StringPiece& text,
654*ccdc9c3eSSadaf Ebrahimi                                 const StringPiece& context) {
655*ccdc9c3eSSadaf Ebrahimi   bool okay = true;
656*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < arraysize(anchors); i++)
657*ccdc9c3eSSadaf Ebrahimi     okay &= TestCase(text, context, anchors[i]);
658*ccdc9c3eSSadaf Ebrahimi   return okay;
659*ccdc9c3eSSadaf Ebrahimi }
660*ccdc9c3eSSadaf Ebrahimi 
TestRegexpOnText(const StringPiece & regexp,const StringPiece & text)661*ccdc9c3eSSadaf Ebrahimi bool TestRegexpOnText(const StringPiece& regexp,
662*ccdc9c3eSSadaf Ebrahimi                       const StringPiece& text) {
663*ccdc9c3eSSadaf Ebrahimi   Tester t(regexp);
664*ccdc9c3eSSadaf Ebrahimi   return t.TestInput(text);
665*ccdc9c3eSSadaf Ebrahimi }
666*ccdc9c3eSSadaf Ebrahimi 
667*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
668