xref: /aosp_15_r20/external/regex-re2/re2/testing/regexp_benchmark.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1 // Copyright 2006-2008 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 // Benchmarks for regular expression implementations.
6 
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <utility>
12 
13 #include "util/test.h"
14 #include "util/logging.h"
15 #include "util/strutil.h"
16 #include "re2/prog.h"
17 #include "re2/re2.h"
18 #include "re2/regexp.h"
19 #include "util/pcre.h"
20 #include "util/benchmark.h"
21 
22 namespace re2 {
23 void Test();
24 void MemoryUsage();
25 }  // namespace re2
26 
27 typedef testing::MallocCounter MallocCounter;
28 
29 namespace re2 {
30 
Test()31 void Test() {
32   Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
33   CHECK(re);
34   Prog* prog = re->CompileToProg(0);
35   CHECK(prog);
36   CHECK(prog->IsOnePass());
37   const char* text = "650-253-0001";
38   StringPiece sp[4];
39   CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
40   CHECK_EQ(sp[0], "650-253-0001");
41   CHECK_EQ(sp[1], "650");
42   CHECK_EQ(sp[2], "253");
43   CHECK_EQ(sp[3], "0001");
44   delete prog;
45   re->Decref();
46   LOG(INFO) << "test passed\n";
47 }
48 
MemoryUsage()49 void MemoryUsage() {
50   const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
51   const char* text = "650-253-0001";
52   {
53     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
54     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
55     CHECK(re);
56     // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
57     // because LOG(INFO) might do a big allocation before they get evaluated.
58     fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
59     mc.Reset();
60 
61     Prog* prog = re->CompileToProg(0);
62     CHECK(prog);
63     CHECK(prog->IsOnePass());
64     fprintf(stderr, "Prog:   %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
65     mc.Reset();
66 
67     StringPiece sp[4];
68     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
69     fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
70     delete prog;
71     re->Decref();
72   }
73 
74   {
75     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
76 
77     PCRE re(regexp, PCRE::UTF8);
78     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
79     PCRE::FullMatch(text, re);
80     fprintf(stderr, "RE:     %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
81   }
82 
83   {
84     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
85 
86     PCRE* re = new PCRE(regexp, PCRE::UTF8);
87     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
88     PCRE::FullMatch(text, *re);
89     fprintf(stderr, "PCRE*:  %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90     delete re;
91   }
92 
93   {
94     MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
95 
96     RE2 re(regexp);
97     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
98     RE2::FullMatch(text, re);
99     fprintf(stderr, "RE2:    %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
100   }
101 
102   fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
103           sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
104 }
105 
106 // Regular expression implementation wrappers.
107 // Defined at bottom of file, but they are repetitive
108 // and not interesting.
109 
110 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
111              Prog::Anchor anchor, bool expect_match);
112 
113 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
114            SearchPCRE, SearchRE2,
115            SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
116            SearchCachedPCRE, SearchCachedRE2;
117 
118 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
119 
120 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
121           Parse1PCRE, Parse1RE2,
122           Parse1Backtrack,
123           Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
124           Parse1CachedPCRE, Parse1CachedRE2,
125           Parse1CachedBacktrack;
126 
127 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
128           Parse3PCRE, Parse3RE2,
129           Parse3Backtrack,
130           Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
131           Parse3CachedPCRE, Parse3CachedRE2,
132           Parse3CachedBacktrack;
133 
134 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
135 
136 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
137 
138 // Benchmark: failed search for regexp in random text.
139 
140 // Generate random text that won't contain the search string,
141 // to test worst-case search behavior.
MakeText(string * text,int nbytes)142 void MakeText(string* text, int nbytes) {
143   srand(1);
144   text->resize(nbytes);
145   for (int i = 0; i < nbytes; i++) {
146     // Generate a one-byte rune that isn't a control character (e.g. '\n').
147     // Clipping to 0x20 introduces some bias, but we don't need uniformity.
148     int byte = rand() & 0x7F;
149     if (byte < 0x20)
150       byte = 0x20;
151     (*text)[i] = byte;
152   }
153 }
154 
155 // Makes text of size nbytes, then calls run to search
156 // the text for regexp iters times.
Search(int iters,int nbytes,const char * regexp,SearchImpl * search)157 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
158   StopBenchmarkTiming();
159   string s;
160   MakeText(&s, nbytes);
161   BenchmarkMemoryUsage();
162   StartBenchmarkTiming();
163   search(iters, regexp, s, Prog::kUnanchored, false);
164   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
165 }
166 
167 // These two are easy because they start with an A,
168 // giving the search loop something to memchr for.
169 #define EASY0      "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
170 #define EASY1      "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
171 
172 // This is a little harder, since it starts with a character class
173 // and thus can't be memchr'ed.  Could look for ABC and work backward,
174 // but no one does that.
175 #define MEDIUM     "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
176 
177 // This is a fair amount harder, because of the leading [ -~]*.
178 // A bad backtracking implementation will take O(text^2) time to
179 // figure out there's no match.
180 #define HARD       "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
181 
182 // This has quite a high degree of fanout.
183 // NFA execution will be particularly slow.
184 #define FANOUT     "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
185 
186 // This stresses engines that are trying to track parentheses.
187 #define PARENS     "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
188                    "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
189 
Search_Easy0_CachedDFA(int i,int n)190 void Search_Easy0_CachedDFA(int i, int n)     { Search(i, n, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(int i,int n)191 void Search_Easy0_CachedNFA(int i, int n)     { Search(i, n, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(int i,int n)192 void Search_Easy0_CachedPCRE(int i, int n)    { Search(i, n, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(int i,int n)193 void Search_Easy0_CachedRE2(int i, int n)     { Search(i, n, EASY0, SearchCachedRE2); }
194 
195 BENCHMARK_RANGE(Search_Easy0_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
196 BENCHMARK_RANGE(Search_Easy0_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
197 #ifdef USEPCRE
198 BENCHMARK_RANGE(Search_Easy0_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
199 #endif
200 BENCHMARK_RANGE(Search_Easy0_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
201 
Search_Easy1_CachedDFA(int i,int n)202 void Search_Easy1_CachedDFA(int i, int n)     { Search(i, n, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(int i,int n)203 void Search_Easy1_CachedNFA(int i, int n)     { Search(i, n, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(int i,int n)204 void Search_Easy1_CachedPCRE(int i, int n)    { Search(i, n, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(int i,int n)205 void Search_Easy1_CachedRE2(int i, int n)     { Search(i, n, EASY1, SearchCachedRE2); }
206 
207 BENCHMARK_RANGE(Search_Easy1_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
208 BENCHMARK_RANGE(Search_Easy1_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
209 #ifdef USEPCRE
210 BENCHMARK_RANGE(Search_Easy1_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
211 #endif
212 BENCHMARK_RANGE(Search_Easy1_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
213 
Search_Medium_CachedDFA(int i,int n)214 void Search_Medium_CachedDFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(int i,int n)215 void Search_Medium_CachedNFA(int i, int n)     { Search(i, n, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(int i,int n)216 void Search_Medium_CachedPCRE(int i, int n)    { Search(i, n, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(int i,int n)217 void Search_Medium_CachedRE2(int i, int n)     { Search(i, n, MEDIUM, SearchCachedRE2); }
218 
219 BENCHMARK_RANGE(Search_Medium_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
220 BENCHMARK_RANGE(Search_Medium_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
221 #ifdef USEPCRE
222 BENCHMARK_RANGE(Search_Medium_CachedPCRE,    8, 256<<10)->ThreadRange(1, NumCPUs());
223 #endif
224 BENCHMARK_RANGE(Search_Medium_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
225 
Search_Hard_CachedDFA(int i,int n)226 void Search_Hard_CachedDFA(int i, int n)     { Search(i, n, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(int i,int n)227 void Search_Hard_CachedNFA(int i, int n)     { Search(i, n, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(int i,int n)228 void Search_Hard_CachedPCRE(int i, int n)    { Search(i, n, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(int i,int n)229 void Search_Hard_CachedRE2(int i, int n)     { Search(i, n, HARD, SearchCachedRE2); }
230 
231 BENCHMARK_RANGE(Search_Hard_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
232 BENCHMARK_RANGE(Search_Hard_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
233 #ifdef USEPCRE
234 BENCHMARK_RANGE(Search_Hard_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
235 #endif
236 BENCHMARK_RANGE(Search_Hard_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
237 
Search_Fanout_CachedDFA(int i,int n)238 void Search_Fanout_CachedDFA(int i, int n)     { Search(i, n, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(int i,int n)239 void Search_Fanout_CachedNFA(int i, int n)     { Search(i, n, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(int i,int n)240 void Search_Fanout_CachedPCRE(int i, int n)    { Search(i, n, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(int i,int n)241 void Search_Fanout_CachedRE2(int i, int n)     { Search(i, n, FANOUT, SearchCachedRE2); }
242 
243 BENCHMARK_RANGE(Search_Fanout_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
244 BENCHMARK_RANGE(Search_Fanout_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
245 #ifdef USEPCRE
246 BENCHMARK_RANGE(Search_Fanout_CachedPCRE,    8, 4<<10)->ThreadRange(1, NumCPUs());
247 #endif
248 BENCHMARK_RANGE(Search_Fanout_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
249 
Search_Parens_CachedDFA(int i,int n)250 void Search_Parens_CachedDFA(int i, int n)     { Search(i, n, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(int i,int n)251 void Search_Parens_CachedNFA(int i, int n)     { Search(i, n, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(int i,int n)252 void Search_Parens_CachedPCRE(int i, int n)    { Search(i, n, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(int i,int n)253 void Search_Parens_CachedRE2(int i, int n)     { Search(i, n, PARENS, SearchCachedRE2); }
254 
255 BENCHMARK_RANGE(Search_Parens_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
256 BENCHMARK_RANGE(Search_Parens_CachedNFA,     8, 256<<10)->ThreadRange(1, NumCPUs());
257 #ifdef USEPCRE
258 BENCHMARK_RANGE(Search_Parens_CachedPCRE,    8, 8)->ThreadRange(1, NumCPUs());
259 #endif
260 BENCHMARK_RANGE(Search_Parens_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
261 
SearchBigFixed(int iters,int nbytes,SearchImpl * search)262 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
263   StopBenchmarkTiming();
264   string s;
265   s.append(nbytes/2, 'x');
266   string regexp = "^" + s + ".*$";
267   string t;
268   MakeText(&t, nbytes/2);
269   s += t;
270   BenchmarkMemoryUsage();
271   StartBenchmarkTiming();
272   search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
273   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
274 }
275 
Search_BigFixed_CachedDFA(int i,int n)276 void Search_BigFixed_CachedDFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedDFA); }
Search_BigFixed_CachedNFA(int i,int n)277 void Search_BigFixed_CachedNFA(int i, int n)     { SearchBigFixed(i, n, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(int i,int n)278 void Search_BigFixed_CachedPCRE(int i, int n)    { SearchBigFixed(i, n, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(int i,int n)279 void Search_BigFixed_CachedRE2(int i, int n)     { SearchBigFixed(i, n, SearchCachedRE2); }
280 
281 BENCHMARK_RANGE(Search_BigFixed_CachedDFA,     8, 1<<20)->ThreadRange(1, NumCPUs());
282 BENCHMARK_RANGE(Search_BigFixed_CachedNFA,     8, 32<<10)->ThreadRange(1, NumCPUs());
283 #ifdef USEPCRE
284 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE,    8, 32<<10)->ThreadRange(1, NumCPUs());
285 #endif
286 BENCHMARK_RANGE(Search_BigFixed_CachedRE2,     8, 1<<20)->ThreadRange(1, NumCPUs());
287 
288 // Benchmark: FindAndConsume
289 
FindAndConsume(int iters,int nbytes)290 void FindAndConsume(int iters, int nbytes) {
291   StopBenchmarkTiming();
292   string s;
293   MakeText(&s, nbytes);
294   s.append("Hello World");
295   StartBenchmarkTiming();
296   RE2 re("((Hello World))");
297   for (int i = 0; i < iters; i++) {
298     StringPiece t = s;
299     StringPiece u;
300     CHECK(RE2::FindAndConsume(&t, re, &u));
301     CHECK_EQ(u, "Hello World");
302   }
303   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
304 }
305 
306 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
307 
308 // Benchmark: successful anchored search.
309 
SearchSuccess(int iters,int nbytes,const char * regexp,SearchImpl * search)310 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
311   StopBenchmarkTiming();
312   string s;
313   MakeText(&s, nbytes);
314   BenchmarkMemoryUsage();
315   StartBenchmarkTiming();
316   search(iters, regexp, s, Prog::kAnchored, true);
317   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
318 }
319 
320 // Unambiguous search (RE2 can use OnePass).
321 
Search_Success_DFA(int i,int n)322 void Search_Success_DFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchDFA); }
Search_Success_NFA(int i,int n)323 void Search_Success_NFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchNFA); }
Search_Success_PCRE(int i,int n)324 void Search_Success_PCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchPCRE); }
Search_Success_RE2(int i,int n)325 void Search_Success_RE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchRE2); }
Search_Success_OnePass(int i,int n)326 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
327 
328 BENCHMARK_RANGE(Search_Success_DFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
329 BENCHMARK_RANGE(Search_Success_NFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
330 #ifdef USEPCRE
331 BENCHMARK_RANGE(Search_Success_PCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
332 #endif
333 BENCHMARK_RANGE(Search_Success_RE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
334 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
335 
Search_Success_CachedDFA(int i,int n)336 void Search_Success_CachedDFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(int i,int n)337 void Search_Success_CachedNFA(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(int i,int n)338 void Search_Success_CachedPCRE(int i, int n)    { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(int i,int n)339 void Search_Success_CachedRE2(int i, int n)     { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(int i,int n)340 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
341 
342 BENCHMARK_RANGE(Search_Success_CachedDFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
343 BENCHMARK_RANGE(Search_Success_CachedNFA,     8, 16<<20)->ThreadRange(1, NumCPUs());
344 #ifdef USEPCRE
345 BENCHMARK_RANGE(Search_Success_CachedPCRE,    8, 16<<20)->ThreadRange(1, NumCPUs());
346 #endif
347 BENCHMARK_RANGE(Search_Success_CachedRE2,     8, 16<<20)->ThreadRange(1, NumCPUs());
348 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
349 
350 // Ambiguous search (RE2 cannot use OnePass).
351 // Used to be ".*.$", but that is coalesced to ".+$" these days.
352 
Search_Success1_DFA(int i,int n)353 void Search_Success1_DFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchDFA); }
Search_Success1_NFA(int i,int n)354 void Search_Success1_NFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(int i,int n)355 void Search_Success1_PCRE(int i, int n)     { SearchSuccess(i, n, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(int i,int n)356 void Search_Success1_RE2(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchRE2); }
Search_Success1_BitState(int i,int n)357 void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); }
358 
359 BENCHMARK_RANGE(Search_Success1_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
360 BENCHMARK_RANGE(Search_Success1_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
361 #ifdef USEPCRE
362 BENCHMARK_RANGE(Search_Success1_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
363 #endif
364 BENCHMARK_RANGE(Search_Success1_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
365 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
366 
Search_Success1_CachedDFA(int i,int n)367 void Search_Success1_CachedDFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(int i,int n)368 void Search_Success1_CachedNFA(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(int i,int n)369 void Search_Success1_CachedPCRE(int i, int n)     { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(int i,int n)370 void Search_Success1_CachedRE2(int i, int n)      { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(int i,int n)371 void Search_Success1_CachedBitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); }
372 
373 BENCHMARK_RANGE(Search_Success1_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
374 BENCHMARK_RANGE(Search_Success1_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
375 #ifdef USEPCRE
376 BENCHMARK_RANGE(Search_Success1_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
377 #endif
378 BENCHMARK_RANGE(Search_Success1_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
379 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
380 
381 // Benchmark: AltMatch optimisation (just to verify that it works)
382 // Note that OnePass doesn't implement it!
383 
SearchAltMatch(int iters,int nbytes,SearchImpl * search)384 void SearchAltMatch(int iters, int nbytes, SearchImpl* search) {
385   StopBenchmarkTiming();
386   string s;
387   MakeText(&s, nbytes);
388   BenchmarkMemoryUsage();
389   StartBenchmarkTiming();
390   search(iters, "\\C*", s, Prog::kAnchored, true);
391   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
392 }
393 
Search_AltMatch_DFA(int i,int n)394 void Search_AltMatch_DFA(int i, int n)      { SearchAltMatch(i, n, SearchDFA); }
Search_AltMatch_NFA(int i,int n)395 void Search_AltMatch_NFA(int i, int n)      { SearchAltMatch(i, n, SearchNFA); }
Search_AltMatch_OnePass(int i,int n)396 void Search_AltMatch_OnePass(int i, int n)  { SearchAltMatch(i, n, SearchOnePass); }
Search_AltMatch_BitState(int i,int n)397 void Search_AltMatch_BitState(int i, int n) { SearchAltMatch(i, n, SearchBitState); }
Search_AltMatch_PCRE(int i,int n)398 void Search_AltMatch_PCRE(int i, int n)     { SearchAltMatch(i, n, SearchPCRE); }
Search_AltMatch_RE2(int i,int n)399 void Search_AltMatch_RE2(int i, int n)      { SearchAltMatch(i, n, SearchRE2); }
400 
401 BENCHMARK_RANGE(Search_AltMatch_DFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
402 BENCHMARK_RANGE(Search_AltMatch_NFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
403 BENCHMARK_RANGE(Search_AltMatch_OnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
404 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK_RANGE(Search_AltMatch_PCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK_RANGE(Search_AltMatch_RE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
409 
Search_AltMatch_CachedDFA(int i,int n)410 void Search_AltMatch_CachedDFA(int i, int n)      { SearchAltMatch(i, n, SearchCachedDFA); }
Search_AltMatch_CachedNFA(int i,int n)411 void Search_AltMatch_CachedNFA(int i, int n)      { SearchAltMatch(i, n, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(int i,int n)412 void Search_AltMatch_CachedOnePass(int i, int n)  { SearchAltMatch(i, n, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(int i,int n)413 void Search_AltMatch_CachedBitState(int i, int n) { SearchAltMatch(i, n, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(int i,int n)414 void Search_AltMatch_CachedPCRE(int i, int n)     { SearchAltMatch(i, n, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(int i,int n)415 void Search_AltMatch_CachedRE2(int i, int n)      { SearchAltMatch(i, n, SearchCachedRE2); }
416 
417 BENCHMARK_RANGE(Search_AltMatch_CachedDFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
418 BENCHMARK_RANGE(Search_AltMatch_CachedNFA,      8, 16<<20)->ThreadRange(1, NumCPUs());
419 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass,  8, 16<<20)->ThreadRange(1, NumCPUs());
420 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 #ifdef USEPCRE
422 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE,     8, 16<<20)->ThreadRange(1, NumCPUs());
423 #endif
424 BENCHMARK_RANGE(Search_AltMatch_CachedRE2,      8, 16<<20)->ThreadRange(1, NumCPUs());
425 
426 // Benchmark: use regexp to find phone number.
427 
SearchDigits(int iters,SearchImpl * search)428 void SearchDigits(int iters, SearchImpl* search) {
429   StringPiece s("650-253-0001");
430   BenchmarkMemoryUsage();
431   search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
432   SetBenchmarkItemsProcessed(iters);
433 }
434 
Search_Digits_DFA(int i)435 void Search_Digits_DFA(int i)         { SearchDigits(i, SearchDFA); }
Search_Digits_NFA(int i)436 void Search_Digits_NFA(int i)         { SearchDigits(i, SearchNFA); }
Search_Digits_OnePass(int i)437 void Search_Digits_OnePass(int i)     { SearchDigits(i, SearchOnePass); }
Search_Digits_PCRE(int i)438 void Search_Digits_PCRE(int i)        { SearchDigits(i, SearchPCRE); }
Search_Digits_RE2(int i)439 void Search_Digits_RE2(int i)         { SearchDigits(i, SearchRE2); }
Search_Digits_BitState(int i)440 void Search_Digits_BitState(int i)         { SearchDigits(i, SearchBitState); }
441 
442 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
444 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
445 #ifdef USEPCRE
446 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
447 #endif
448 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
449 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
450 
451 // Benchmark: use regexp to parse digit fields in phone number.
452 
Parse3Digits(int iters,void (* parse3)(int,const char *,const StringPiece &))453 void Parse3Digits(int iters,
454                void (*parse3)(int, const char*, const StringPiece&)) {
455   BenchmarkMemoryUsage();
456   parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
457   SetBenchmarkItemsProcessed(iters);
458 }
459 
Parse_Digits_NFA(int i)460 void Parse_Digits_NFA(int i)         { Parse3Digits(i, Parse3NFA); }
Parse_Digits_OnePass(int i)461 void Parse_Digits_OnePass(int i)     { Parse3Digits(i, Parse3OnePass); }
Parse_Digits_PCRE(int i)462 void Parse_Digits_PCRE(int i)        { Parse3Digits(i, Parse3PCRE); }
Parse_Digits_RE2(int i)463 void Parse_Digits_RE2(int i)         { Parse3Digits(i, Parse3RE2); }
Parse_Digits_Backtrack(int i)464 void Parse_Digits_Backtrack(int i)   { Parse3Digits(i, Parse3Backtrack); }
Parse_Digits_BitState(int i)465 void Parse_Digits_BitState(int i)   { Parse3Digits(i, Parse3BitState); }
466 
467 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
468 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
469 #ifdef USEPCRE
470 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
471 #endif
472 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
473 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
474 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
475 
Parse_CachedDigits_NFA(int i)476 void Parse_CachedDigits_NFA(int i)         { Parse3Digits(i, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(int i)477 void Parse_CachedDigits_OnePass(int i)     { Parse3Digits(i, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(int i)478 void Parse_CachedDigits_PCRE(int i)        { Parse3Digits(i, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(int i)479 void Parse_CachedDigits_RE2(int i)         { Parse3Digits(i, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(int i)480 void Parse_CachedDigits_Backtrack(int i)   { Parse3Digits(i, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(int i)481 void Parse_CachedDigits_BitState(int i)   { Parse3Digits(i, Parse3CachedBitState); }
482 
483 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
484 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
485 #ifdef USEPCRE
486 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
487 #endif
488 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
489 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
490 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
491 
Parse3DigitDs(int iters,void (* parse3)(int,const char *,const StringPiece &))492 void Parse3DigitDs(int iters,
493                void (*parse3)(int, const char*, const StringPiece&)) {
494   BenchmarkMemoryUsage();
495   parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
496   SetBenchmarkItemsProcessed(iters);
497 }
498 
Parse_DigitDs_NFA(int i)499 void Parse_DigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3NFA); }
Parse_DigitDs_OnePass(int i)500 void Parse_DigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3OnePass); }
Parse_DigitDs_PCRE(int i)501 void Parse_DigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3PCRE); }
Parse_DigitDs_RE2(int i)502 void Parse_DigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3RE2); }
Parse_DigitDs_Backtrack(int i)503 void Parse_DigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(int i)504 void Parse_DigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
505 
506 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
507 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
508 #ifdef USEPCRE
509 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
510 #endif
511 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
512 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
513 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
514 
Parse_CachedDigitDs_NFA(int i)515 void Parse_CachedDigitDs_NFA(int i)         { Parse3DigitDs(i, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(int i)516 void Parse_CachedDigitDs_OnePass(int i)     { Parse3DigitDs(i, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(int i)517 void Parse_CachedDigitDs_PCRE(int i)        { Parse3DigitDs(i, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(int i)518 void Parse_CachedDigitDs_RE2(int i)         { Parse3DigitDs(i, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(int i)519 void Parse_CachedDigitDs_Backtrack(int i)   { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(int i)520 void Parse_CachedDigitDs_BitState(int i)   { Parse3DigitDs(i, Parse3CachedBitState); }
521 
522 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
523 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
524 #ifdef USEPCRE
525 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
526 #endif
527 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
528 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
529 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
530 
531 // Benchmark: splitting off leading number field.
532 
Parse1Split(int iters,void (* parse1)(int,const char *,const StringPiece &))533 void Parse1Split(int iters,
534               void (*parse1)(int, const char*, const StringPiece&)) {
535   BenchmarkMemoryUsage();
536   parse1(iters, "[0-9]+-(.*)", "650-253-0001");
537   SetBenchmarkItemsProcessed(iters);
538 }
539 
Parse_Split_NFA(int i)540 void Parse_Split_NFA(int i)         { Parse1Split(i, Parse1NFA); }
Parse_Split_OnePass(int i)541 void Parse_Split_OnePass(int i)     { Parse1Split(i, Parse1OnePass); }
Parse_Split_PCRE(int i)542 void Parse_Split_PCRE(int i)        { Parse1Split(i, Parse1PCRE); }
Parse_Split_RE2(int i)543 void Parse_Split_RE2(int i)         { Parse1Split(i, Parse1RE2); }
Parse_Split_BitState(int i)544 void Parse_Split_BitState(int i)         { Parse1Split(i, Parse1BitState); }
545 
546 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
547 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
548 #ifdef USEPCRE
549 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
550 #endif
551 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
552 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
553 
Parse_CachedSplit_NFA(int i)554 void Parse_CachedSplit_NFA(int i)         { Parse1Split(i, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(int i)555 void Parse_CachedSplit_OnePass(int i)     { Parse1Split(i, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(int i)556 void Parse_CachedSplit_PCRE(int i)        { Parse1Split(i, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(int i)557 void Parse_CachedSplit_RE2(int i)         { Parse1Split(i, Parse1CachedRE2); }
Parse_CachedSplit_BitState(int i)558 void Parse_CachedSplit_BitState(int i)         { Parse1Split(i, Parse1CachedBitState); }
559 
560 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
561 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
562 #ifdef USEPCRE
563 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
564 #endif
565 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
566 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
567 
568 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
569 
Parse1SplitHard(int iters,void (* run)(int,const char *,const StringPiece &))570 void Parse1SplitHard(int iters,
571                   void (*run)(int, const char*, const StringPiece&)) {
572   BenchmarkMemoryUsage();
573   run(iters, "[0-9]+.(.*)", "650-253-0001");
574   SetBenchmarkItemsProcessed(iters);
575 }
576 
Parse_SplitHard_NFA(int i)577 void Parse_SplitHard_NFA(int i)         { Parse1SplitHard(i, Parse1NFA); }
Parse_SplitHard_PCRE(int i)578 void Parse_SplitHard_PCRE(int i)        { Parse1SplitHard(i, Parse1PCRE); }
Parse_SplitHard_RE2(int i)579 void Parse_SplitHard_RE2(int i)         { Parse1SplitHard(i, Parse1RE2); }
Parse_SplitHard_BitState(int i)580 void Parse_SplitHard_BitState(int i)         { Parse1SplitHard(i, Parse1BitState); }
581 
582 #ifdef USEPCRE
583 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
584 #endif
585 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
586 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
587 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
588 
Parse_CachedSplitHard_NFA(int i)589 void Parse_CachedSplitHard_NFA(int i)       { Parse1SplitHard(i, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(int i)590 void Parse_CachedSplitHard_PCRE(int i)      { Parse1SplitHard(i, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(int i)591 void Parse_CachedSplitHard_RE2(int i)       { Parse1SplitHard(i, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(int i)592 void Parse_CachedSplitHard_BitState(int i)       { Parse1SplitHard(i, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(int i)593 void Parse_CachedSplitHard_Backtrack(int i)       { Parse1SplitHard(i, Parse1CachedBacktrack); }
594 
595 #ifdef USEPCRE
596 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
597 #endif
598 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
599 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
600 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
601 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
602 
603 // Benchmark: Parse1SplitHard, big text, small match.
604 
Parse1SplitBig1(int iters,void (* run)(int,const char *,const StringPiece &))605 void Parse1SplitBig1(int iters,
606                   void (*run)(int, const char*, const StringPiece&)) {
607   string s;
608   s.append(100000, 'x');
609   s.append("650-253-0001");
610   BenchmarkMemoryUsage();
611   run(iters, "[0-9]+.(.*)", s);
612   SetBenchmarkItemsProcessed(iters);
613 }
614 
Parse_CachedSplitBig1_PCRE(int i)615 void Parse_CachedSplitBig1_PCRE(int i)      { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(int i)616 void Parse_CachedSplitBig1_RE2(int i)       { Parse1SplitBig1(i, SearchParse1CachedRE2); }
617 
618 #ifdef USEPCRE
619 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
620 #endif
621 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
622 
623 // Benchmark: Parse1SplitHard, big text, big match.
624 
Parse1SplitBig2(int iters,void (* run)(int,const char *,const StringPiece &))625 void Parse1SplitBig2(int iters,
626                   void (*run)(int, const char*, const StringPiece&)) {
627   string s;
628   s.append("650-253-");
629   s.append(100000, '0');
630   BenchmarkMemoryUsage();
631   run(iters, "[0-9]+.(.*)", s);
632   SetBenchmarkItemsProcessed(iters);
633 }
634 
Parse_CachedSplitBig2_PCRE(int i)635 void Parse_CachedSplitBig2_PCRE(int i)      { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(int i)636 void Parse_CachedSplitBig2_RE2(int i)       { Parse1SplitBig2(i, SearchParse1CachedRE2); }
637 
638 #ifdef USEPCRE
639 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
640 #endif
641 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
642 
643 // Benchmark: measure time required to parse (but not execute)
644 // a simple regular expression.
645 
ParseRegexp(int iters,const string & regexp)646 void ParseRegexp(int iters, const string& regexp) {
647   for (int i = 0; i < iters; i++) {
648     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
649     CHECK(re);
650     re->Decref();
651   }
652 }
653 
SimplifyRegexp(int iters,const string & regexp)654 void SimplifyRegexp(int iters, const string& regexp) {
655   for (int i = 0; i < iters; i++) {
656     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
657     CHECK(re);
658     Regexp* sre = re->Simplify();
659     CHECK(sre);
660     sre->Decref();
661     re->Decref();
662   }
663 }
664 
NullWalkRegexp(int iters,const string & regexp)665 void NullWalkRegexp(int iters, const string& regexp) {
666   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
667   CHECK(re);
668   for (int i = 0; i < iters; i++) {
669     re->NullWalk();
670   }
671   re->Decref();
672 }
673 
SimplifyCompileRegexp(int iters,const string & regexp)674 void SimplifyCompileRegexp(int iters, const string& regexp) {
675   for (int i = 0; i < iters; i++) {
676     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
677     CHECK(re);
678     Regexp* sre = re->Simplify();
679     CHECK(sre);
680     Prog* prog = sre->CompileToProg(0);
681     CHECK(prog);
682     delete prog;
683     sre->Decref();
684     re->Decref();
685   }
686 }
687 
CompileRegexp(int iters,const string & regexp)688 void CompileRegexp(int iters, const string& regexp) {
689   for (int i = 0; i < iters; i++) {
690     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
691     CHECK(re);
692     Prog* prog = re->CompileToProg(0);
693     CHECK(prog);
694     delete prog;
695     re->Decref();
696   }
697 }
698 
CompileToProg(int iters,const string & regexp)699 void CompileToProg(int iters, const string& regexp) {
700   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
701   CHECK(re);
702   for (int i = 0; i < iters; i++) {
703     Prog* prog = re->CompileToProg(0);
704     CHECK(prog);
705     delete prog;
706   }
707   re->Decref();
708 }
709 
CompileByteMap(int iters,const string & regexp)710 void CompileByteMap(int iters, const string& regexp) {
711   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
712   CHECK(re);
713   Prog* prog = re->CompileToProg(0);
714   CHECK(prog);
715   for (int i = 0; i < iters; i++) {
716     prog->ComputeByteMap();
717   }
718   delete prog;
719   re->Decref();
720 }
721 
CompilePCRE(int iters,const string & regexp)722 void CompilePCRE(int iters, const string& regexp) {
723   for (int i = 0; i < iters; i++) {
724     PCRE re(regexp, PCRE::UTF8);
725     CHECK_EQ(re.error(), "");
726   }
727 }
728 
CompileRE2(int iters,const string & regexp)729 void CompileRE2(int iters, const string& regexp) {
730   for (int i = 0; i < iters; i++) {
731     RE2 re(regexp);
732     CHECK_EQ(re.error(), "");
733   }
734 }
735 
RunBuild(int iters,const string & regexp,void (* run)(int,const string &))736 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
737   run(iters, regexp);
738   SetBenchmarkItemsProcessed(iters);
739 }
740 
741 }  // namespace re2
742 
743 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
744 
745 namespace re2 {
746 
BM_PCRE_Compile(int i)747 void BM_PCRE_Compile(int i)      { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
BM_Regexp_Parse(int i)748 void BM_Regexp_Parse(int i)      { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
BM_Regexp_Simplify(int i)749 void BM_Regexp_Simplify(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
BM_CompileToProg(int i)750 void BM_CompileToProg(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
BM_CompileByteMap(int i)751 void BM_CompileByteMap(int i)     { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
BM_Regexp_Compile(int i)752 void BM_Regexp_Compile(int i)    { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
BM_Regexp_SimplifyCompile(int i)753 void BM_Regexp_SimplifyCompile(int i)   { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
BM_Regexp_NullWalk(int i)754 void BM_Regexp_NullWalk(int i)   { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
BM_RE2_Compile(int i)755 void BM_RE2_Compile(int i)       { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
756 
757 #ifdef USEPCRE
758 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
759 #endif
760 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
761 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
762 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
763 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
764 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
768 
769 // Makes text of size nbytes, then calls run to search
770 // the text for regexp iters times.
SearchPhone(int iters,int nbytes,ParseImpl * search)771 void SearchPhone(int iters, int nbytes, ParseImpl* search) {
772   StopBenchmarkTiming();
773   string s;
774   MakeText(&s, nbytes);
775   s.append("(650) 253-0001");
776   BenchmarkMemoryUsage();
777   StartBenchmarkTiming();
778   search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
779   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
780 }
781 
SearchPhone_CachedPCRE(int i,int n)782 void SearchPhone_CachedPCRE(int i, int n) {
783   SearchPhone(i, n, SearchParse2CachedPCRE);
784 }
SearchPhone_CachedRE2(int i,int n)785 void SearchPhone_CachedRE2(int i, int n) {
786   SearchPhone(i, n, SearchParse2CachedRE2);
787 }
788 
789 #ifdef USEPCRE
790 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
791 #endif
792 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
793 
794 /*
795 TODO(rsc): Make this work again.
796 
797 // Generates and returns a string over binary alphabet {0,1} that contains
798 // all possible binary sequences of length n as subsequences.  The obvious
799 // brute force method would generate a string of length n * 2^n, but this
800 // generates a string of length n + 2^n - 1 called a De Bruijn cycle.
801 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
802 static string DeBruijnString(int n) {
803   CHECK_LT(n, 8*sizeof(int));
804   CHECK_GT(n, 0);
805 
806   std::vector<bool> did(1<<n);
807   for (int i = 0; i < 1<<n; i++)
808     did[i] = false;
809 
810   string s;
811   for (int i = 0; i < n-1; i++)
812     s.append("0");
813   int bits = 0;
814   int mask = (1<<n) - 1;
815   for (int i = 0; i < (1<<n); i++) {
816     bits <<= 1;
817     bits &= mask;
818     if (!did[bits|1]) {
819       bits |= 1;
820       s.append("1");
821     } else {
822       s.append("0");
823     }
824     CHECK(!did[bits]);
825     did[bits] = true;
826   }
827   return s;
828 }
829 
830 void CacheFill(int iters, int n, SearchImpl *srch) {
831   string s = DeBruijnString(n+1);
832   string t;
833   for (int i = n+1; i < 20; i++) {
834     t = s + s;
835     using std::swap;
836     swap(s, t);
837   }
838   srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
839        Prog::kUnanchored, true);
840   SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
841 }
842 
843 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
844 void CacheFillRE2(int i, int n)  { CacheFill(i, n, SearchCachedRE2); }
845 void CacheFillNFA(int i, int n)  { CacheFill(i, n, SearchCachedNFA); }
846 void CacheFillDFA(int i, int n)  { CacheFill(i, n, SearchCachedDFA); }
847 
848 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
849 // for the static BenchmarkRegisterer, which makes it unusable inside
850 // a macro like DO24 below.  MY_BENCHMARK_WITH_ARG uses the argument a
851 // to make the identifiers distinct (only possible when 'a' is a simple
852 // expression like 2, not like 1+1).
853 #define MY_BENCHMARK_WITH_ARG(n, a) \
854   bool __benchmark_ ## n ## a =     \
855     (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
856 
857 #define DO24(A, B) \
858   A(B, 1);    A(B, 2);    A(B, 3);    A(B, 4);    A(B, 5);    A(B, 6);  \
859   A(B, 7);    A(B, 8);    A(B, 9);    A(B, 10);   A(B, 11);   A(B, 12); \
860   A(B, 13);   A(B, 14);   A(B, 15);   A(B, 16);   A(B, 17);   A(B, 18); \
861   A(B, 19);   A(B, 20);   A(B, 21);   A(B, 22);   A(B, 23);   A(B, 24);
862 
863 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
864 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
865 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
866 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
867 
868 #undef DO24
869 #undef MY_BENCHMARK_WITH_ARG
870 */
871 
872 ////////////////////////////////////////////////////////////////////////
873 //
874 // Implementation routines.  Sad that there are so many,
875 // but all the interfaces are slightly different.
876 
877 // Runs implementation to search for regexp in text, iters times.
878 // Expect_match says whether the regexp should be found.
879 // Anchored says whether to run an anchored search.
880 
SearchDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)881 void SearchDFA(int iters, const char* regexp, const StringPiece& text,
882             Prog::Anchor anchor, bool expect_match) {
883   for (int i = 0; i < iters; i++) {
884     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
885     CHECK(re);
886     Prog* prog = re->CompileToProg(0);
887     CHECK(prog);
888     bool failed = false;
889     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
890                              NULL, &failed, NULL),
891              expect_match);
892     CHECK(!failed);
893     delete prog;
894     re->Decref();
895   }
896 }
897 
SearchNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)898 void SearchNFA(int iters, const char* regexp, const StringPiece& text,
899             Prog::Anchor anchor, bool expect_match) {
900   for (int i = 0; i < iters; i++) {
901     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
902     CHECK(re);
903     Prog* prog = re->CompileToProg(0);
904     CHECK(prog);
905     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
906                              NULL, 0),
907              expect_match);
908     delete prog;
909     re->Decref();
910   }
911 }
912 
SearchOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)913 void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
914             Prog::Anchor anchor, bool expect_match) {
915   for (int i = 0; i < iters; i++) {
916     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
917     CHECK(re);
918     Prog* prog = re->CompileToProg(0);
919     CHECK(prog);
920     CHECK(prog->IsOnePass());
921     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
922              expect_match);
923     delete prog;
924     re->Decref();
925   }
926 }
927 
SearchBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)928 void SearchBitState(int iters, const char* regexp, const StringPiece& text,
929             Prog::Anchor anchor, bool expect_match) {
930   for (int i = 0; i < iters; i++) {
931     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
932     CHECK(re);
933     Prog* prog = re->CompileToProg(0);
934     CHECK(prog);
935     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
936              expect_match);
937     delete prog;
938     re->Decref();
939   }
940 }
941 
SearchPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)942 void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
943                 Prog::Anchor anchor, bool expect_match) {
944   for (int i = 0; i < iters; i++) {
945     PCRE re(regexp, PCRE::UTF8);
946     CHECK_EQ(re.error(), "");
947     if (anchor == Prog::kAnchored)
948       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
949     else
950       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
951   }
952 }
953 
SearchRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)954 void SearchRE2(int iters, const char* regexp, const StringPiece& text,
955                Prog::Anchor anchor, bool expect_match) {
956   for (int i = 0; i < iters; i++) {
957     RE2 re(regexp);
958     CHECK_EQ(re.error(), "");
959     if (anchor == Prog::kAnchored)
960       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
961     else
962       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
963   }
964 }
965 
966 // SearchCachedXXX is like SearchXXX but only does the
967 // regexp parsing and compiling once.  This lets us measure
968 // search time without the per-regexp overhead.
969 
SearchCachedDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)970 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
971                      Prog::Anchor anchor, bool expect_match) {
972   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
973   CHECK(re);
974   Prog* prog = re->CompileToProg(1LL<<31);
975   CHECK(prog);
976   for (int i = 0; i < iters; i++) {
977     bool failed = false;
978     CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
979                              NULL, &failed, NULL),
980              expect_match);
981     CHECK(!failed);
982   }
983   delete prog;
984   re->Decref();
985 }
986 
SearchCachedNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)987 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
988                      Prog::Anchor anchor, bool expect_match) {
989   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
990   CHECK(re);
991   Prog* prog = re->CompileToProg(0);
992   CHECK(prog);
993   for (int i = 0; i < iters; i++) {
994     CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
995                              NULL, 0),
996              expect_match);
997   }
998   delete prog;
999   re->Decref();
1000 }
1001 
SearchCachedOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1002 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
1003                      Prog::Anchor anchor, bool expect_match) {
1004   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005   CHECK(re);
1006   Prog* prog = re->CompileToProg(0);
1007   CHECK(prog);
1008   CHECK(prog->IsOnePass());
1009   for (int i = 0; i < iters; i++)
1010     CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1011              expect_match);
1012   delete prog;
1013   re->Decref();
1014 }
1015 
SearchCachedBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1016 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
1017                      Prog::Anchor anchor, bool expect_match) {
1018   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1019   CHECK(re);
1020   Prog* prog = re->CompileToProg(0);
1021   CHECK(prog);
1022   for (int i = 0; i < iters; i++)
1023     CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1024              expect_match);
1025   delete prog;
1026   re->Decref();
1027 }
1028 
SearchCachedPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1029 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
1030                      Prog::Anchor anchor, bool expect_match) {
1031   PCRE re(regexp, PCRE::UTF8);
1032   CHECK_EQ(re.error(), "");
1033   for (int i = 0; i < iters; i++) {
1034     if (anchor == Prog::kAnchored)
1035       CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1036     else
1037       CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1038   }
1039 }
1040 
SearchCachedRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1041 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
1042                      Prog::Anchor anchor, bool expect_match) {
1043   RE2 re(regexp);
1044   CHECK_EQ(re.error(), "");
1045   for (int i = 0; i < iters; i++) {
1046     if (anchor == Prog::kAnchored)
1047       CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1048     else
1049       CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1050   }
1051 }
1052 
1053 
1054 // Runs implementation to full match regexp against text,
1055 // extracting three submatches.  Expects match always.
1056 
Parse3NFA(int iters,const char * regexp,const StringPiece & text)1057 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
1058   for (int i = 0; i < iters; i++) {
1059     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1060     CHECK(re);
1061     Prog* prog = re->CompileToProg(0);
1062     CHECK(prog);
1063     StringPiece sp[4];  // 4 because sp[0] is whole match.
1064     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1065                           Prog::kFullMatch, sp, 4));
1066     delete prog;
1067     re->Decref();
1068   }
1069 }
1070 
Parse3OnePass(int iters,const char * regexp,const StringPiece & text)1071 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
1072   for (int i = 0; i < iters; i++) {
1073     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074     CHECK(re);
1075     Prog* prog = re->CompileToProg(0);
1076     CHECK(prog);
1077     CHECK(prog->IsOnePass());
1078     StringPiece sp[4];  // 4 because sp[0] is whole match.
1079     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080     delete prog;
1081     re->Decref();
1082   }
1083 }
1084 
Parse3BitState(int iters,const char * regexp,const StringPiece & text)1085 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1086   for (int i = 0; i < iters; i++) {
1087     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1088     CHECK(re);
1089     Prog* prog = re->CompileToProg(0);
1090     CHECK(prog);
1091     StringPiece sp[4];  // 4 because sp[0] is whole match.
1092     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1093     delete prog;
1094     re->Decref();
1095   }
1096 }
1097 
Parse3Backtrack(int iters,const char * regexp,const StringPiece & text)1098 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1099   for (int i = 0; i < iters; i++) {
1100     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1101     CHECK(re);
1102     Prog* prog = re->CompileToProg(0);
1103     CHECK(prog);
1104     StringPiece sp[4];  // 4 because sp[0] is whole match.
1105     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1106     delete prog;
1107     re->Decref();
1108   }
1109 }
1110 
Parse3PCRE(int iters,const char * regexp,const StringPiece & text)1111 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1112   for (int i = 0; i < iters; i++) {
1113     PCRE re(regexp, PCRE::UTF8);
1114     CHECK_EQ(re.error(), "");
1115     StringPiece sp1, sp2, sp3;
1116     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1117   }
1118 }
1119 
Parse3RE2(int iters,const char * regexp,const StringPiece & text)1120 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1121   for (int i = 0; i < iters; i++) {
1122     RE2 re(regexp);
1123     CHECK_EQ(re.error(), "");
1124     StringPiece sp1, sp2, sp3;
1125     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1126   }
1127 }
1128 
Parse3CachedNFA(int iters,const char * regexp,const StringPiece & text)1129 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1130   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1131   CHECK(re);
1132   Prog* prog = re->CompileToProg(0);
1133   CHECK(prog);
1134   StringPiece sp[4];  // 4 because sp[0] is whole match.
1135   for (int i = 0; i < iters; i++) {
1136     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1137                           Prog::kFullMatch, sp, 4));
1138   }
1139   delete prog;
1140   re->Decref();
1141 }
1142 
Parse3CachedOnePass(int iters,const char * regexp,const StringPiece & text)1143 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1144   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1145   CHECK(re);
1146   Prog* prog = re->CompileToProg(0);
1147   CHECK(prog);
1148   CHECK(prog->IsOnePass());
1149   StringPiece sp[4];  // 4 because sp[0] is whole match.
1150   for (int i = 0; i < iters; i++)
1151     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1152   delete prog;
1153   re->Decref();
1154 }
1155 
Parse3CachedBitState(int iters,const char * regexp,const StringPiece & text)1156 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1157   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1158   CHECK(re);
1159   Prog* prog = re->CompileToProg(0);
1160   CHECK(prog);
1161   StringPiece sp[4];  // 4 because sp[0] is whole match.
1162   for (int i = 0; i < iters; i++)
1163     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1164   delete prog;
1165   re->Decref();
1166 }
1167 
Parse3CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1168 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1169   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1170   CHECK(re);
1171   Prog* prog = re->CompileToProg(0);
1172   CHECK(prog);
1173   StringPiece sp[4];  // 4 because sp[0] is whole match.
1174   for (int i = 0; i < iters; i++)
1175     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1176   delete prog;
1177   re->Decref();
1178 }
1179 
Parse3CachedPCRE(int iters,const char * regexp,const StringPiece & text)1180 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1181   PCRE re(regexp, PCRE::UTF8);
1182   CHECK_EQ(re.error(), "");
1183   StringPiece sp1, sp2, sp3;
1184   for (int i = 0; i < iters; i++) {
1185     CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1186   }
1187 }
1188 
Parse3CachedRE2(int iters,const char * regexp,const StringPiece & text)1189 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1190   RE2 re(regexp);
1191   CHECK_EQ(re.error(), "");
1192   StringPiece sp1, sp2, sp3;
1193   for (int i = 0; i < iters; i++) {
1194     CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1195   }
1196 }
1197 
1198 
1199 // Runs implementation to full match regexp against text,
1200 // extracting three submatches.  Expects match always.
1201 
Parse1NFA(int iters,const char * regexp,const StringPiece & text)1202 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1203   for (int i = 0; i < iters; i++) {
1204     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1205     CHECK(re);
1206     Prog* prog = re->CompileToProg(0);
1207     CHECK(prog);
1208     StringPiece sp[2];  // 2 because sp[0] is whole match.
1209     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1210                           Prog::kFullMatch, sp, 2));
1211     delete prog;
1212     re->Decref();
1213   }
1214 }
1215 
Parse1OnePass(int iters,const char * regexp,const StringPiece & text)1216 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1217   for (int i = 0; i < iters; i++) {
1218     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1219     CHECK(re);
1220     Prog* prog = re->CompileToProg(0);
1221     CHECK(prog);
1222     CHECK(prog->IsOnePass());
1223     StringPiece sp[2];  // 2 because sp[0] is whole match.
1224     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1225     delete prog;
1226     re->Decref();
1227   }
1228 }
1229 
Parse1BitState(int iters,const char * regexp,const StringPiece & text)1230 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1231   for (int i = 0; i < iters; i++) {
1232     Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1233     CHECK(re);
1234     Prog* prog = re->CompileToProg(0);
1235     CHECK(prog);
1236     StringPiece sp[2];  // 2 because sp[0] is whole match.
1237     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238     delete prog;
1239     re->Decref();
1240   }
1241 }
1242 
Parse1PCRE(int iters,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1244   for (int i = 0; i < iters; i++) {
1245     PCRE re(regexp, PCRE::UTF8);
1246     CHECK_EQ(re.error(), "");
1247     StringPiece sp1;
1248     CHECK(PCRE::FullMatch(text, re, &sp1));
1249   }
1250 }
1251 
Parse1RE2(int iters,const char * regexp,const StringPiece & text)1252 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1253   for (int i = 0; i < iters; i++) {
1254     RE2 re(regexp);
1255     CHECK_EQ(re.error(), "");
1256     StringPiece sp1;
1257     CHECK(RE2::FullMatch(text, re, &sp1));
1258   }
1259 }
1260 
Parse1CachedNFA(int iters,const char * regexp,const StringPiece & text)1261 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1262   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1263   CHECK(re);
1264   Prog* prog = re->CompileToProg(0);
1265   CHECK(prog);
1266   StringPiece sp[2];  // 2 because sp[0] is whole match.
1267   for (int i = 0; i < iters; i++) {
1268     CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1269                           Prog::kFullMatch, sp, 2));
1270   }
1271   delete prog;
1272   re->Decref();
1273 }
1274 
Parse1CachedOnePass(int iters,const char * regexp,const StringPiece & text)1275 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1276   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1277   CHECK(re);
1278   Prog* prog = re->CompileToProg(0);
1279   CHECK(prog);
1280   CHECK(prog->IsOnePass());
1281   StringPiece sp[2];  // 2 because sp[0] is whole match.
1282   for (int i = 0; i < iters; i++)
1283     CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1284   delete prog;
1285   re->Decref();
1286 }
1287 
Parse1CachedBitState(int iters,const char * regexp,const StringPiece & text)1288 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1289   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1290   CHECK(re);
1291   Prog* prog = re->CompileToProg(0);
1292   CHECK(prog);
1293   StringPiece sp[2];  // 2 because sp[0] is whole match.
1294   for (int i = 0; i < iters; i++)
1295     CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1296   delete prog;
1297   re->Decref();
1298 }
1299 
Parse1CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1300 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1301   Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1302   CHECK(re);
1303   Prog* prog = re->CompileToProg(0);
1304   CHECK(prog);
1305   StringPiece sp[2];  // 2 because sp[0] is whole match.
1306   for (int i = 0; i < iters; i++)
1307     CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1308   delete prog;
1309   re->Decref();
1310 }
1311 
Parse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1312 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1313   PCRE re(regexp, PCRE::UTF8);
1314   CHECK_EQ(re.error(), "");
1315   StringPiece sp1;
1316   for (int i = 0; i < iters; i++) {
1317     CHECK(PCRE::FullMatch(text, re, &sp1));
1318   }
1319 }
1320 
Parse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1321 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1322   RE2 re(regexp);
1323   CHECK_EQ(re.error(), "");
1324   StringPiece sp1;
1325   for (int i = 0; i < iters; i++) {
1326     CHECK(RE2::FullMatch(text, re, &sp1));
1327   }
1328 }
1329 
SearchParse2CachedPCRE(int iters,const char * regexp,const StringPiece & text)1330 void SearchParse2CachedPCRE(int iters, const char* regexp,
1331                             const StringPiece& text) {
1332   PCRE re(regexp, PCRE::UTF8);
1333   CHECK_EQ(re.error(), "");
1334   for (int i = 0; i < iters; i++) {
1335     StringPiece sp1, sp2;
1336     CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1337   }
1338 }
1339 
SearchParse2CachedRE2(int iters,const char * regexp,const StringPiece & text)1340 void SearchParse2CachedRE2(int iters, const char* regexp,
1341                            const StringPiece& text) {
1342   RE2 re(regexp);
1343   CHECK_EQ(re.error(), "");
1344   for (int i = 0; i < iters; i++) {
1345     StringPiece sp1, sp2;
1346     CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1347   }
1348 }
1349 
SearchParse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1350 void SearchParse1CachedPCRE(int iters, const char* regexp,
1351                             const StringPiece& text) {
1352   PCRE re(regexp, PCRE::UTF8);
1353   CHECK_EQ(re.error(), "");
1354   for (int i = 0; i < iters; i++) {
1355     StringPiece sp1;
1356     CHECK(PCRE::PartialMatch(text, re, &sp1));
1357   }
1358 }
1359 
SearchParse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1360 void SearchParse1CachedRE2(int iters, const char* regexp,
1361                            const StringPiece& text) {
1362   RE2 re(regexp);
1363   CHECK_EQ(re.error(), "");
1364   for (int i = 0; i < iters; i++) {
1365     StringPiece sp1;
1366     CHECK(RE2::PartialMatch(text, re, &sp1));
1367   }
1368 }
1369 
EmptyPartialMatchPCRE(int n)1370 void EmptyPartialMatchPCRE(int n) {
1371   PCRE re("");
1372   for (int i = 0; i < n; i++) {
1373     PCRE::PartialMatch("", re);
1374   }
1375 }
1376 
EmptyPartialMatchRE2(int n)1377 void EmptyPartialMatchRE2(int n) {
1378   RE2 re("");
1379   for (int i = 0; i < n; i++) {
1380     RE2::PartialMatch("", re);
1381   }
1382 }
1383 #ifdef USEPCRE
1384 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1385 #endif
1386 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1387 
SimplePartialMatchPCRE(int n)1388 void SimplePartialMatchPCRE(int n) {
1389   PCRE re("abcdefg");
1390   for (int i = 0; i < n; i++) {
1391     PCRE::PartialMatch("abcdefg", re);
1392   }
1393 }
1394 
SimplePartialMatchRE2(int n)1395 void SimplePartialMatchRE2(int n) {
1396   RE2 re("abcdefg");
1397   for (int i = 0; i < n; i++) {
1398     RE2::PartialMatch("abcdefg", re);
1399   }
1400 }
1401 #ifdef USEPCRE
1402 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1403 #endif
1404 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1405 
1406 static string http_text =
1407   "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1408   "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1409 
HTTPPartialMatchPCRE(int n)1410 void HTTPPartialMatchPCRE(int n) {
1411   StringPiece a;
1412   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1413   for (int i = 0; i < n; i++) {
1414     PCRE::PartialMatch(http_text, re, &a);
1415   }
1416 }
1417 
HTTPPartialMatchRE2(int n)1418 void HTTPPartialMatchRE2(int n) {
1419   StringPiece a;
1420   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1421   for (int i = 0; i < n; i++) {
1422     RE2::PartialMatch(http_text, re, &a);
1423   }
1424 }
1425 
1426 #ifdef USEPCRE
1427 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1428 #endif
1429 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1430 
1431 static string smallhttp_text =
1432   "GET /abc HTTP/1.1";
1433 
SmallHTTPPartialMatchPCRE(int n)1434 void SmallHTTPPartialMatchPCRE(int n) {
1435   StringPiece a;
1436   PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1437   for (int i = 0; i < n; i++) {
1438     PCRE::PartialMatch(smallhttp_text, re, &a);
1439   }
1440 }
1441 
SmallHTTPPartialMatchRE2(int n)1442 void SmallHTTPPartialMatchRE2(int n) {
1443   StringPiece a;
1444   RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1445   for (int i = 0; i < n; i++) {
1446     RE2::PartialMatch(smallhttp_text, re, &a);
1447   }
1448 }
1449 
1450 #ifdef USEPCRE
1451 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1452 #endif
1453 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1454 
DotMatchPCRE(int n)1455 void DotMatchPCRE(int n) {
1456   StringPiece a;
1457   PCRE re("(?-s)^(.+)");
1458   for (int i = 0; i < n; i++) {
1459     PCRE::PartialMatch(http_text, re, &a);
1460   }
1461 }
1462 
DotMatchRE2(int n)1463 void DotMatchRE2(int n) {
1464   StringPiece a;
1465   RE2 re("(?-s)^(.+)");
1466   for (int i = 0; i < n; i++) {
1467     RE2::PartialMatch(http_text, re, &a);
1468   }
1469 }
1470 
1471 #ifdef USEPCRE
1472 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1473 #endif
1474 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1475 
ASCIIMatchPCRE(int n)1476 void ASCIIMatchPCRE(int n) {
1477   StringPiece a;
1478   PCRE re("(?-s)^([ -~]+)");
1479   for (int i = 0; i < n; i++) {
1480     PCRE::PartialMatch(http_text, re, &a);
1481   }
1482 }
1483 
ASCIIMatchRE2(int n)1484 void ASCIIMatchRE2(int n) {
1485   StringPiece a;
1486   RE2 re("(?-s)^([ -~]+)");
1487   for (int i = 0; i < n; i++) {
1488     RE2::PartialMatch(http_text, re, &a);
1489   }
1490 }
1491 
1492 #ifdef USEPCRE
1493 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1494 #endif
1495 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1496 
FullMatchPCRE(int iter,int n,const char * regexp)1497 void FullMatchPCRE(int iter, int n, const char *regexp) {
1498   StopBenchmarkTiming();
1499   string s;
1500   MakeText(&s, n);
1501   s += "ABCDEFGHIJ";
1502   BenchmarkMemoryUsage();
1503   PCRE re(regexp);
1504   StartBenchmarkTiming();
1505   for (int i = 0; i < iter; i++)
1506     CHECK(PCRE::FullMatch(s, re));
1507   SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n);
1508 }
1509 
FullMatchRE2(int iter,int n,const char * regexp)1510 void FullMatchRE2(int iter, int n, const char *regexp) {
1511   StopBenchmarkTiming();
1512   string s;
1513   MakeText(&s, n);
1514   s += "ABCDEFGHIJ";
1515   BenchmarkMemoryUsage();
1516   RE2 re(regexp, RE2::Latin1);
1517   StartBenchmarkTiming();
1518   for (int i = 0; i < iter; i++)
1519     CHECK(RE2::FullMatch(s, re));
1520   SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n);
1521 }
1522 
FullMatch_DotStar_CachedPCRE(int i,int n)1523 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
FullMatch_DotStar_CachedRE2(int i,int n)1524 void FullMatch_DotStar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*"); }
1525 
FullMatch_DotStarDollar_CachedPCRE(int i,int n)1526 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(int i,int n)1527 void FullMatch_DotStarDollar_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s).*$"); }
1528 
FullMatch_DotStarCapture_CachedPCRE(int i,int n)1529 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(int i,int n)1530 void FullMatch_DotStarCapture_CachedRE2(int i, int n)  { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1531 
1532 #ifdef USEPCRE
1533 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1534 #endif
1535 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2,  8, 2<<20);
1536 
1537 #ifdef USEPCRE
1538 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1539 #endif
1540 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2,  8, 2<<20);
1541 
1542 #ifdef USEPCRE
1543 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1544 #endif
1545 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2,  8, 2<<20);
1546 
PossibleMatchRangeCommon(int iter,const char * regexp)1547 void PossibleMatchRangeCommon(int iter, const char* regexp) {
1548   StopBenchmarkTiming();
1549   RE2 re(regexp);
1550   StartBenchmarkTiming();
1551   string min;
1552   string max;
1553   const int kMaxLen = 16;
1554   for (int i = 0; i < iter; i++) {
1555     CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1556   }
1557 }
1558 
PossibleMatchRange_Trivial(int i)1559 void PossibleMatchRange_Trivial(int i) {
1560   PossibleMatchRangeCommon(i, ".*");
1561 }
PossibleMatchRange_Complex(int i)1562 void PossibleMatchRange_Complex(int i) {
1563   PossibleMatchRangeCommon(i, "^abc[def]?[gh]{1,2}.*");
1564 }
PossibleMatchRange_Prefix(int i)1565 void PossibleMatchRange_Prefix(int i) {
1566   PossibleMatchRangeCommon(i, "^some_random_prefix.*");
1567 }
PossibleMatchRange_NoProg(int i)1568 void PossibleMatchRange_NoProg(int i) {
1569   PossibleMatchRangeCommon(i, "^some_random_string$");
1570 }
1571 
1572 BENCHMARK(PossibleMatchRange_Trivial);
1573 BENCHMARK(PossibleMatchRange_Complex);
1574 BENCHMARK(PossibleMatchRange_Prefix);
1575 BENCHMARK(PossibleMatchRange_NoProg);
1576 
1577 }  // namespace re2
1578