xref: /aosp_15_r20/external/regex-re2/re2/testing/exhaustive_tester.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1 // Copyright 2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Exhaustive testing of regular expression matching.
6 
7 // Each test picks an alphabet (e.g., "abc"), a maximum string length,
8 // a maximum regular expression length, and a maximum number of letters
9 // that can appear in the regular expression.  Given these parameters,
10 // it tries every possible regular expression and string, verifying that
11 // the NFA, DFA, and a trivial backtracking implementation agree about
12 // the location of the match.
13 
14 #include <stdio.h>
15 
16 #include "util/test.h"
17 #include "util/logging.h"
18 #include "util/strutil.h"
19 #include "re2/testing/exhaustive_tester.h"
20 #include "re2/testing/tester.h"
21 
22 // For target `log' in the Makefile.
23 #ifndef LOGGING
24 #define LOGGING 0
25 #endif
26 
27 DEFINE_bool(show_regexps, false, "show regexps during testing");
28 
29 DEFINE_int32(max_bad_regexp_inputs, 1,
30              "Stop testing a regular expression after finding this many "
31              "strings that break it.");
32 
33 namespace re2 {
34 
escape(const StringPiece & sp)35 static char* escape(const StringPiece& sp) {
36   static char buf[512];
37   char* p = buf;
38   *p++ = '\"';
39   for (size_t i = 0; i < sp.size(); i++) {
40     if(p+5 >= buf+sizeof buf)
41       LOG(FATAL) << "ExhaustiveTester escape: too long";
42     if(sp[i] == '\\' || sp[i] == '\"') {
43       *p++ = '\\';
44       *p++ = sp[i];
45     } else if(sp[i] == '\n') {
46       *p++ = '\\';
47       *p++ = 'n';
48     } else {
49       *p++ = sp[i];
50     }
51   }
52   *p++ = '\"';
53   *p = '\0';
54   return buf;
55 }
56 
PrintResult(const RE2 & re,const StringPiece & input,RE2::Anchor anchor,StringPiece * m,int n)57 static void PrintResult(const RE2& re, const StringPiece& input, RE2::Anchor anchor, StringPiece *m, int n) {
58   if (!re.Match(input, 0, input.size(), anchor, m, n)) {
59     printf("-");
60     return;
61   }
62   for (int i = 0; i < n; i++) {
63     if (i > 0)
64       printf(" ");
65     if (m[i].begin() == NULL)
66       printf("-");
67     else
68       printf("%td-%td",
69              m[i].begin() - input.begin(), m[i].end() - input.begin());
70   }
71 }
72 
73 // Processes a single generated regexp.
74 // Compiles it using Regexp interface and PCRE, and then
75 // checks that NFA, DFA, and PCRE all return the same results.
HandleRegexp(const string & const_regexp)76 void ExhaustiveTester::HandleRegexp(const string& const_regexp) {
77   regexps_++;
78   string regexp = const_regexp;
79   if (!topwrapper_.empty())
80     regexp = StringPrintf(topwrapper_.c_str(), regexp.c_str());
81 
82   if (FLAGS_show_regexps) {
83     printf("\r%s", regexp.c_str());
84     fflush(stdout);
85   }
86 
87   if (LOGGING) {
88     // Write out test cases and answers for use in testing
89     // other implementations, such as Go's regexp package.
90     if (randomstrings_)
91       LOG(ERROR) << "Cannot log with random strings.";
92     if (regexps_ == 1) {  // first
93       printf("strings\n");
94       strgen_.Reset();
95       while (strgen_.HasNext())
96         printf("%s\n", escape(strgen_.Next()));
97       printf("regexps\n");
98     }
99     printf("%s\n", escape(regexp));
100 
101     RE2 re(regexp);
102     RE2::Options longest;
103     longest.set_longest_match(true);
104     RE2 relongest(regexp, longest);
105     int ngroup = re.NumberOfCapturingGroups()+1;
106     StringPiece* group = new StringPiece[ngroup];
107 
108     strgen_.Reset();
109     while (strgen_.HasNext()) {
110       StringPiece input = strgen_.Next();
111       PrintResult(re, input, RE2::ANCHOR_BOTH, group, ngroup);
112       printf(";");
113       PrintResult(re, input, RE2::UNANCHORED, group, ngroup);
114       printf(";");
115       PrintResult(relongest, input, RE2::ANCHOR_BOTH, group, ngroup);
116       printf(";");
117       PrintResult(relongest, input, RE2::UNANCHORED, group, ngroup);
118       printf("\n");
119     }
120     delete[] group;
121     return;
122   }
123 
124   Tester tester(regexp);
125   if (tester.error())
126     return;
127 
128   strgen_.Reset();
129   strgen_.GenerateNULL();
130   if (randomstrings_)
131     strgen_.Random(stringseed_, stringcount_);
132   int bad_inputs = 0;
133   while (strgen_.HasNext()) {
134     tests_++;
135     if (!tester.TestInput(strgen_.Next())) {
136       failures_++;
137       if (++bad_inputs >= FLAGS_max_bad_regexp_inputs)
138         break;
139     }
140   }
141 }
142 
143 // Runs an exhaustive test on the given parameters.
ExhaustiveTest(int maxatoms,int maxops,const std::vector<string> & alphabet,const std::vector<string> & ops,int maxstrlen,const std::vector<string> & stralphabet,const string & wrapper,const string & topwrapper)144 void ExhaustiveTest(int maxatoms, int maxops,
145                     const std::vector<string>& alphabet,
146                     const std::vector<string>& ops,
147                     int maxstrlen,
148                     const std::vector<string>& stralphabet,
149                     const string& wrapper,
150                     const string& topwrapper) {
151   if (RE2_DEBUG_MODE) {
152     if (maxatoms > 1)
153       maxatoms--;
154     if (maxops > 1)
155       maxops--;
156     if (maxstrlen > 1)
157       maxstrlen--;
158   }
159   ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
160                      maxstrlen, stralphabet, wrapper,
161                      topwrapper);
162   t.Generate();
163   if (!LOGGING) {
164     printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
165            t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
166   }
167   EXPECT_EQ(0, t.failures());
168 }
169 
170 // Runs an exhaustive test using the given parameters and
171 // the basic egrep operators.
EgrepTest(int maxatoms,int maxops,const string & alphabet,int maxstrlen,const string & stralphabet,const string & wrapper)172 void EgrepTest(int maxatoms, int maxops, const string& alphabet,
173                int maxstrlen, const string& stralphabet,
174                const string& wrapper) {
175   const char* tops[] = { "", "^(?:%s)", "(?:%s)$", "^(?:%s)$" };
176 
177   for (int i = 0; i < arraysize(tops); i++) {
178     ExhaustiveTest(maxatoms, maxops,
179                    Split("", alphabet),
180                    RegexpGenerator::EgrepOps(),
181                    maxstrlen,
182                    Split("", stralphabet),
183                    wrapper,
184                    tops[i]);
185   }
186 }
187 
188 }  // namespace re2
189