1 // Copyright 2006-2008 The RE2 Authors. All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // Benchmarks for regular expression implementations.
6
7 #include <stdint.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string>
11 #include <utility>
12
13 #include "util/test.h"
14 #include "util/logging.h"
15 #include "util/strutil.h"
16 #include "re2/prog.h"
17 #include "re2/re2.h"
18 #include "re2/regexp.h"
19 #include "util/pcre.h"
20 #include "util/benchmark.h"
21
22 namespace re2 {
23 void Test();
24 void MemoryUsage();
25 } // namespace re2
26
27 typedef testing::MallocCounter MallocCounter;
28
29 namespace re2 {
30
Test()31 void Test() {
32 Regexp* re = Regexp::Parse("(\\d+)-(\\d+)-(\\d+)", Regexp::LikePerl, NULL);
33 CHECK(re);
34 Prog* prog = re->CompileToProg(0);
35 CHECK(prog);
36 CHECK(prog->IsOnePass());
37 const char* text = "650-253-0001";
38 StringPiece sp[4];
39 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
40 CHECK_EQ(sp[0], "650-253-0001");
41 CHECK_EQ(sp[1], "650");
42 CHECK_EQ(sp[2], "253");
43 CHECK_EQ(sp[3], "0001");
44 delete prog;
45 re->Decref();
46 LOG(INFO) << "test passed\n";
47 }
48
MemoryUsage()49 void MemoryUsage() {
50 const char* regexp = "(\\d+)-(\\d+)-(\\d+)";
51 const char* text = "650-253-0001";
52 {
53 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
54 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
55 CHECK(re);
56 // Can't pass mc.HeapGrowth() and mc.PeakHeapGrowth() to LOG(INFO) directly,
57 // because LOG(INFO) might do a big allocation before they get evaluated.
58 fprintf(stderr, "Regexp: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
59 mc.Reset();
60
61 Prog* prog = re->CompileToProg(0);
62 CHECK(prog);
63 CHECK(prog->IsOnePass());
64 fprintf(stderr, "Prog: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
65 mc.Reset();
66
67 StringPiece sp[4];
68 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
69 fprintf(stderr, "Search: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
70 delete prog;
71 re->Decref();
72 }
73
74 {
75 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
76
77 PCRE re(regexp, PCRE::UTF8);
78 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
79 PCRE::FullMatch(text, re);
80 fprintf(stderr, "RE: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
81 }
82
83 {
84 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
85
86 PCRE* re = new PCRE(regexp, PCRE::UTF8);
87 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
88 PCRE::FullMatch(text, *re);
89 fprintf(stderr, "PCRE*: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
90 delete re;
91 }
92
93 {
94 MallocCounter mc(MallocCounter::THIS_THREAD_ONLY);
95
96 RE2 re(regexp);
97 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
98 RE2::FullMatch(text, re);
99 fprintf(stderr, "RE2: %7lld bytes (peak=%lld)\n", mc.HeapGrowth(), mc.PeakHeapGrowth());
100 }
101
102 fprintf(stderr, "sizeof: PCRE=%zd RE2=%zd Prog=%zd Inst=%zd\n",
103 sizeof(PCRE), sizeof(RE2), sizeof(Prog), sizeof(Prog::Inst));
104 }
105
106 // Regular expression implementation wrappers.
107 // Defined at bottom of file, but they are repetitive
108 // and not interesting.
109
110 typedef void SearchImpl(int iters, const char* regexp, const StringPiece& text,
111 Prog::Anchor anchor, bool expect_match);
112
113 SearchImpl SearchDFA, SearchNFA, SearchOnePass, SearchBitState,
114 SearchPCRE, SearchRE2,
115 SearchCachedDFA, SearchCachedNFA, SearchCachedOnePass, SearchCachedBitState,
116 SearchCachedPCRE, SearchCachedRE2;
117
118 typedef void ParseImpl(int iters, const char* regexp, const StringPiece& text);
119
120 ParseImpl Parse1NFA, Parse1OnePass, Parse1BitState,
121 Parse1PCRE, Parse1RE2,
122 Parse1Backtrack,
123 Parse1CachedNFA, Parse1CachedOnePass, Parse1CachedBitState,
124 Parse1CachedPCRE, Parse1CachedRE2,
125 Parse1CachedBacktrack;
126
127 ParseImpl Parse3NFA, Parse3OnePass, Parse3BitState,
128 Parse3PCRE, Parse3RE2,
129 Parse3Backtrack,
130 Parse3CachedNFA, Parse3CachedOnePass, Parse3CachedBitState,
131 Parse3CachedPCRE, Parse3CachedRE2,
132 Parse3CachedBacktrack;
133
134 ParseImpl SearchParse2CachedPCRE, SearchParse2CachedRE2;
135
136 ParseImpl SearchParse1CachedPCRE, SearchParse1CachedRE2;
137
138 // Benchmark: failed search for regexp in random text.
139
140 // Generate random text that won't contain the search string,
141 // to test worst-case search behavior.
MakeText(string * text,int nbytes)142 void MakeText(string* text, int nbytes) {
143 srand(1);
144 text->resize(nbytes);
145 for (int i = 0; i < nbytes; i++) {
146 // Generate a one-byte rune that isn't a control character (e.g. '\n').
147 // Clipping to 0x20 introduces some bias, but we don't need uniformity.
148 int byte = rand() & 0x7F;
149 if (byte < 0x20)
150 byte = 0x20;
151 (*text)[i] = byte;
152 }
153 }
154
155 // Makes text of size nbytes, then calls run to search
156 // the text for regexp iters times.
Search(int iters,int nbytes,const char * regexp,SearchImpl * search)157 void Search(int iters, int nbytes, const char* regexp, SearchImpl* search) {
158 StopBenchmarkTiming();
159 string s;
160 MakeText(&s, nbytes);
161 BenchmarkMemoryUsage();
162 StartBenchmarkTiming();
163 search(iters, regexp, s, Prog::kUnanchored, false);
164 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
165 }
166
167 // These two are easy because they start with an A,
168 // giving the search loop something to memchr for.
169 #define EASY0 "ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
170 #define EASY1 "A[AB]B[BC]C[CD]D[DE]E[EF]F[FG]G[GH]H[HI]I[IJ]J$"
171
172 // This is a little harder, since it starts with a character class
173 // and thus can't be memchr'ed. Could look for ABC and work backward,
174 // but no one does that.
175 #define MEDIUM "[XYZ]ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
176
177 // This is a fair amount harder, because of the leading [ -~]*.
178 // A bad backtracking implementation will take O(text^2) time to
179 // figure out there's no match.
180 #define HARD "[ -~]*ABCDEFGHIJKLMNOPQRSTUVWXYZ$"
181
182 // This has quite a high degree of fanout.
183 // NFA execution will be particularly slow.
184 #define FANOUT "(?:[\\x{80}-\\x{10FFFF}]?){100}[\\x{80}-\\x{10FFFF}]"
185
186 // This stresses engines that are trying to track parentheses.
187 #define PARENS "([ -~])*(A)(B)(C)(D)(E)(F)(G)(H)(I)(J)(K)(L)(M)" \
188 "(N)(O)(P)(Q)(R)(S)(T)(U)(V)(W)(X)(Y)(Z)$"
189
Search_Easy0_CachedDFA(int i,int n)190 void Search_Easy0_CachedDFA(int i, int n) { Search(i, n, EASY0, SearchCachedDFA); }
Search_Easy0_CachedNFA(int i,int n)191 void Search_Easy0_CachedNFA(int i, int n) { Search(i, n, EASY0, SearchCachedNFA); }
Search_Easy0_CachedPCRE(int i,int n)192 void Search_Easy0_CachedPCRE(int i, int n) { Search(i, n, EASY0, SearchCachedPCRE); }
Search_Easy0_CachedRE2(int i,int n)193 void Search_Easy0_CachedRE2(int i, int n) { Search(i, n, EASY0, SearchCachedRE2); }
194
195 BENCHMARK_RANGE(Search_Easy0_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
196 BENCHMARK_RANGE(Search_Easy0_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
197 #ifdef USEPCRE
198 BENCHMARK_RANGE(Search_Easy0_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
199 #endif
200 BENCHMARK_RANGE(Search_Easy0_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
201
Search_Easy1_CachedDFA(int i,int n)202 void Search_Easy1_CachedDFA(int i, int n) { Search(i, n, EASY1, SearchCachedDFA); }
Search_Easy1_CachedNFA(int i,int n)203 void Search_Easy1_CachedNFA(int i, int n) { Search(i, n, EASY1, SearchCachedNFA); }
Search_Easy1_CachedPCRE(int i,int n)204 void Search_Easy1_CachedPCRE(int i, int n) { Search(i, n, EASY1, SearchCachedPCRE); }
Search_Easy1_CachedRE2(int i,int n)205 void Search_Easy1_CachedRE2(int i, int n) { Search(i, n, EASY1, SearchCachedRE2); }
206
207 BENCHMARK_RANGE(Search_Easy1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
208 BENCHMARK_RANGE(Search_Easy1_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
209 #ifdef USEPCRE
210 BENCHMARK_RANGE(Search_Easy1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
211 #endif
212 BENCHMARK_RANGE(Search_Easy1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
213
Search_Medium_CachedDFA(int i,int n)214 void Search_Medium_CachedDFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedDFA); }
Search_Medium_CachedNFA(int i,int n)215 void Search_Medium_CachedNFA(int i, int n) { Search(i, n, MEDIUM, SearchCachedNFA); }
Search_Medium_CachedPCRE(int i,int n)216 void Search_Medium_CachedPCRE(int i, int n) { Search(i, n, MEDIUM, SearchCachedPCRE); }
Search_Medium_CachedRE2(int i,int n)217 void Search_Medium_CachedRE2(int i, int n) { Search(i, n, MEDIUM, SearchCachedRE2); }
218
219 BENCHMARK_RANGE(Search_Medium_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
220 BENCHMARK_RANGE(Search_Medium_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
221 #ifdef USEPCRE
222 BENCHMARK_RANGE(Search_Medium_CachedPCRE, 8, 256<<10)->ThreadRange(1, NumCPUs());
223 #endif
224 BENCHMARK_RANGE(Search_Medium_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
225
Search_Hard_CachedDFA(int i,int n)226 void Search_Hard_CachedDFA(int i, int n) { Search(i, n, HARD, SearchCachedDFA); }
Search_Hard_CachedNFA(int i,int n)227 void Search_Hard_CachedNFA(int i, int n) { Search(i, n, HARD, SearchCachedNFA); }
Search_Hard_CachedPCRE(int i,int n)228 void Search_Hard_CachedPCRE(int i, int n) { Search(i, n, HARD, SearchCachedPCRE); }
Search_Hard_CachedRE2(int i,int n)229 void Search_Hard_CachedRE2(int i, int n) { Search(i, n, HARD, SearchCachedRE2); }
230
231 BENCHMARK_RANGE(Search_Hard_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
232 BENCHMARK_RANGE(Search_Hard_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
233 #ifdef USEPCRE
234 BENCHMARK_RANGE(Search_Hard_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
235 #endif
236 BENCHMARK_RANGE(Search_Hard_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
237
Search_Fanout_CachedDFA(int i,int n)238 void Search_Fanout_CachedDFA(int i, int n) { Search(i, n, FANOUT, SearchCachedDFA); }
Search_Fanout_CachedNFA(int i,int n)239 void Search_Fanout_CachedNFA(int i, int n) { Search(i, n, FANOUT, SearchCachedNFA); }
Search_Fanout_CachedPCRE(int i,int n)240 void Search_Fanout_CachedPCRE(int i, int n) { Search(i, n, FANOUT, SearchCachedPCRE); }
Search_Fanout_CachedRE2(int i,int n)241 void Search_Fanout_CachedRE2(int i, int n) { Search(i, n, FANOUT, SearchCachedRE2); }
242
243 BENCHMARK_RANGE(Search_Fanout_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
244 BENCHMARK_RANGE(Search_Fanout_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
245 #ifdef USEPCRE
246 BENCHMARK_RANGE(Search_Fanout_CachedPCRE, 8, 4<<10)->ThreadRange(1, NumCPUs());
247 #endif
248 BENCHMARK_RANGE(Search_Fanout_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
249
Search_Parens_CachedDFA(int i,int n)250 void Search_Parens_CachedDFA(int i, int n) { Search(i, n, PARENS, SearchCachedDFA); }
Search_Parens_CachedNFA(int i,int n)251 void Search_Parens_CachedNFA(int i, int n) { Search(i, n, PARENS, SearchCachedNFA); }
Search_Parens_CachedPCRE(int i,int n)252 void Search_Parens_CachedPCRE(int i, int n) { Search(i, n, PARENS, SearchCachedPCRE); }
Search_Parens_CachedRE2(int i,int n)253 void Search_Parens_CachedRE2(int i, int n) { Search(i, n, PARENS, SearchCachedRE2); }
254
255 BENCHMARK_RANGE(Search_Parens_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
256 BENCHMARK_RANGE(Search_Parens_CachedNFA, 8, 256<<10)->ThreadRange(1, NumCPUs());
257 #ifdef USEPCRE
258 BENCHMARK_RANGE(Search_Parens_CachedPCRE, 8, 8)->ThreadRange(1, NumCPUs());
259 #endif
260 BENCHMARK_RANGE(Search_Parens_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
261
SearchBigFixed(int iters,int nbytes,SearchImpl * search)262 void SearchBigFixed(int iters, int nbytes, SearchImpl* search) {
263 StopBenchmarkTiming();
264 string s;
265 s.append(nbytes/2, 'x');
266 string regexp = "^" + s + ".*$";
267 string t;
268 MakeText(&t, nbytes/2);
269 s += t;
270 BenchmarkMemoryUsage();
271 StartBenchmarkTiming();
272 search(iters, regexp.c_str(), s, Prog::kUnanchored, true);
273 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
274 }
275
Search_BigFixed_CachedDFA(int i,int n)276 void Search_BigFixed_CachedDFA(int i, int n) { SearchBigFixed(i, n, SearchCachedDFA); }
Search_BigFixed_CachedNFA(int i,int n)277 void Search_BigFixed_CachedNFA(int i, int n) { SearchBigFixed(i, n, SearchCachedNFA); }
Search_BigFixed_CachedPCRE(int i,int n)278 void Search_BigFixed_CachedPCRE(int i, int n) { SearchBigFixed(i, n, SearchCachedPCRE); }
Search_BigFixed_CachedRE2(int i,int n)279 void Search_BigFixed_CachedRE2(int i, int n) { SearchBigFixed(i, n, SearchCachedRE2); }
280
281 BENCHMARK_RANGE(Search_BigFixed_CachedDFA, 8, 1<<20)->ThreadRange(1, NumCPUs());
282 BENCHMARK_RANGE(Search_BigFixed_CachedNFA, 8, 32<<10)->ThreadRange(1, NumCPUs());
283 #ifdef USEPCRE
284 BENCHMARK_RANGE(Search_BigFixed_CachedPCRE, 8, 32<<10)->ThreadRange(1, NumCPUs());
285 #endif
286 BENCHMARK_RANGE(Search_BigFixed_CachedRE2, 8, 1<<20)->ThreadRange(1, NumCPUs());
287
288 // Benchmark: FindAndConsume
289
FindAndConsume(int iters,int nbytes)290 void FindAndConsume(int iters, int nbytes) {
291 StopBenchmarkTiming();
292 string s;
293 MakeText(&s, nbytes);
294 s.append("Hello World");
295 StartBenchmarkTiming();
296 RE2 re("((Hello World))");
297 for (int i = 0; i < iters; i++) {
298 StringPiece t = s;
299 StringPiece u;
300 CHECK(RE2::FindAndConsume(&t, re, &u));
301 CHECK_EQ(u, "Hello World");
302 }
303 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
304 }
305
306 BENCHMARK_RANGE(FindAndConsume, 8, 16<<20)->ThreadRange(1, NumCPUs());
307
308 // Benchmark: successful anchored search.
309
SearchSuccess(int iters,int nbytes,const char * regexp,SearchImpl * search)310 void SearchSuccess(int iters, int nbytes, const char* regexp, SearchImpl* search) {
311 StopBenchmarkTiming();
312 string s;
313 MakeText(&s, nbytes);
314 BenchmarkMemoryUsage();
315 StartBenchmarkTiming();
316 search(iters, regexp, s, Prog::kAnchored, true);
317 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
318 }
319
320 // Unambiguous search (RE2 can use OnePass).
321
Search_Success_DFA(int i,int n)322 void Search_Success_DFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchDFA); }
Search_Success_NFA(int i,int n)323 void Search_Success_NFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchNFA); }
Search_Success_PCRE(int i,int n)324 void Search_Success_PCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchPCRE); }
Search_Success_RE2(int i,int n)325 void Search_Success_RE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchRE2); }
Search_Success_OnePass(int i,int n)326 void Search_Success_OnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchOnePass); }
327
328 BENCHMARK_RANGE(Search_Success_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
329 BENCHMARK_RANGE(Search_Success_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
330 #ifdef USEPCRE
331 BENCHMARK_RANGE(Search_Success_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
332 #endif
333 BENCHMARK_RANGE(Search_Success_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
334 BENCHMARK_RANGE(Search_Success_OnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
335
Search_Success_CachedDFA(int i,int n)336 void Search_Success_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedDFA); }
Search_Success_CachedNFA(int i,int n)337 void Search_Success_CachedNFA(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedNFA); }
Search_Success_CachedPCRE(int i,int n)338 void Search_Success_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedPCRE); }
Search_Success_CachedRE2(int i,int n)339 void Search_Success_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedRE2); }
Search_Success_CachedOnePass(int i,int n)340 void Search_Success_CachedOnePass(int i, int n) { SearchSuccess(i, n, ".*$", SearchCachedOnePass); }
341
342 BENCHMARK_RANGE(Search_Success_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
343 BENCHMARK_RANGE(Search_Success_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
344 #ifdef USEPCRE
345 BENCHMARK_RANGE(Search_Success_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
346 #endif
347 BENCHMARK_RANGE(Search_Success_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
348 BENCHMARK_RANGE(Search_Success_CachedOnePass, 8, 2<<20)->ThreadRange(1, NumCPUs());
349
350 // Ambiguous search (RE2 cannot use OnePass).
351 // Used to be ".*.$", but that is coalesced to ".+$" these days.
352
Search_Success1_DFA(int i,int n)353 void Search_Success1_DFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchDFA); }
Search_Success1_NFA(int i,int n)354 void Search_Success1_NFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchNFA); }
Search_Success1_PCRE(int i,int n)355 void Search_Success1_PCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchPCRE); }
Search_Success1_RE2(int i,int n)356 void Search_Success1_RE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchRE2); }
Search_Success1_BitState(int i,int n)357 void Search_Success1_BitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchBitState); }
358
359 BENCHMARK_RANGE(Search_Success1_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
360 BENCHMARK_RANGE(Search_Success1_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
361 #ifdef USEPCRE
362 BENCHMARK_RANGE(Search_Success1_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
363 #endif
364 BENCHMARK_RANGE(Search_Success1_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
365 BENCHMARK_RANGE(Search_Success1_BitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
366
Search_Success1_CachedDFA(int i,int n)367 void Search_Success1_CachedDFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedDFA); }
Search_Success1_CachedNFA(int i,int n)368 void Search_Success1_CachedNFA(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedNFA); }
Search_Success1_CachedPCRE(int i,int n)369 void Search_Success1_CachedPCRE(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedPCRE); }
Search_Success1_CachedRE2(int i,int n)370 void Search_Success1_CachedRE2(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedRE2); }
Search_Success1_CachedBitState(int i,int n)371 void Search_Success1_CachedBitState(int i, int n) { SearchSuccess(i, n, ".*\\C$", SearchCachedBitState); }
372
373 BENCHMARK_RANGE(Search_Success1_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
374 BENCHMARK_RANGE(Search_Success1_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
375 #ifdef USEPCRE
376 BENCHMARK_RANGE(Search_Success1_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
377 #endif
378 BENCHMARK_RANGE(Search_Success1_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
379 BENCHMARK_RANGE(Search_Success1_CachedBitState, 8, 2<<20)->ThreadRange(1, NumCPUs());
380
381 // Benchmark: AltMatch optimisation (just to verify that it works)
382 // Note that OnePass doesn't implement it!
383
SearchAltMatch(int iters,int nbytes,SearchImpl * search)384 void SearchAltMatch(int iters, int nbytes, SearchImpl* search) {
385 StopBenchmarkTiming();
386 string s;
387 MakeText(&s, nbytes);
388 BenchmarkMemoryUsage();
389 StartBenchmarkTiming();
390 search(iters, "\\C*", s, Prog::kAnchored, true);
391 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
392 }
393
Search_AltMatch_DFA(int i,int n)394 void Search_AltMatch_DFA(int i, int n) { SearchAltMatch(i, n, SearchDFA); }
Search_AltMatch_NFA(int i,int n)395 void Search_AltMatch_NFA(int i, int n) { SearchAltMatch(i, n, SearchNFA); }
Search_AltMatch_OnePass(int i,int n)396 void Search_AltMatch_OnePass(int i, int n) { SearchAltMatch(i, n, SearchOnePass); }
Search_AltMatch_BitState(int i,int n)397 void Search_AltMatch_BitState(int i, int n) { SearchAltMatch(i, n, SearchBitState); }
Search_AltMatch_PCRE(int i,int n)398 void Search_AltMatch_PCRE(int i, int n) { SearchAltMatch(i, n, SearchPCRE); }
Search_AltMatch_RE2(int i,int n)399 void Search_AltMatch_RE2(int i, int n) { SearchAltMatch(i, n, SearchRE2); }
400
401 BENCHMARK_RANGE(Search_AltMatch_DFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
402 BENCHMARK_RANGE(Search_AltMatch_NFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
403 BENCHMARK_RANGE(Search_AltMatch_OnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
404 BENCHMARK_RANGE(Search_AltMatch_BitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
405 #ifdef USEPCRE
406 BENCHMARK_RANGE(Search_AltMatch_PCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
407 #endif
408 BENCHMARK_RANGE(Search_AltMatch_RE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
409
Search_AltMatch_CachedDFA(int i,int n)410 void Search_AltMatch_CachedDFA(int i, int n) { SearchAltMatch(i, n, SearchCachedDFA); }
Search_AltMatch_CachedNFA(int i,int n)411 void Search_AltMatch_CachedNFA(int i, int n) { SearchAltMatch(i, n, SearchCachedNFA); }
Search_AltMatch_CachedOnePass(int i,int n)412 void Search_AltMatch_CachedOnePass(int i, int n) { SearchAltMatch(i, n, SearchCachedOnePass); }
Search_AltMatch_CachedBitState(int i,int n)413 void Search_AltMatch_CachedBitState(int i, int n) { SearchAltMatch(i, n, SearchCachedBitState); }
Search_AltMatch_CachedPCRE(int i,int n)414 void Search_AltMatch_CachedPCRE(int i, int n) { SearchAltMatch(i, n, SearchCachedPCRE); }
Search_AltMatch_CachedRE2(int i,int n)415 void Search_AltMatch_CachedRE2(int i, int n) { SearchAltMatch(i, n, SearchCachedRE2); }
416
417 BENCHMARK_RANGE(Search_AltMatch_CachedDFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
418 BENCHMARK_RANGE(Search_AltMatch_CachedNFA, 8, 16<<20)->ThreadRange(1, NumCPUs());
419 BENCHMARK_RANGE(Search_AltMatch_CachedOnePass, 8, 16<<20)->ThreadRange(1, NumCPUs());
420 BENCHMARK_RANGE(Search_AltMatch_CachedBitState, 8, 16<<20)->ThreadRange(1, NumCPUs());
421 #ifdef USEPCRE
422 BENCHMARK_RANGE(Search_AltMatch_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
423 #endif
424 BENCHMARK_RANGE(Search_AltMatch_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
425
426 // Benchmark: use regexp to find phone number.
427
SearchDigits(int iters,SearchImpl * search)428 void SearchDigits(int iters, SearchImpl* search) {
429 StringPiece s("650-253-0001");
430 BenchmarkMemoryUsage();
431 search(iters, "([0-9]+)-([0-9]+)-([0-9]+)", s, Prog::kAnchored, true);
432 SetBenchmarkItemsProcessed(iters);
433 }
434
Search_Digits_DFA(int i)435 void Search_Digits_DFA(int i) { SearchDigits(i, SearchDFA); }
Search_Digits_NFA(int i)436 void Search_Digits_NFA(int i) { SearchDigits(i, SearchNFA); }
Search_Digits_OnePass(int i)437 void Search_Digits_OnePass(int i) { SearchDigits(i, SearchOnePass); }
Search_Digits_PCRE(int i)438 void Search_Digits_PCRE(int i) { SearchDigits(i, SearchPCRE); }
Search_Digits_RE2(int i)439 void Search_Digits_RE2(int i) { SearchDigits(i, SearchRE2); }
Search_Digits_BitState(int i)440 void Search_Digits_BitState(int i) { SearchDigits(i, SearchBitState); }
441
442 BENCHMARK(Search_Digits_DFA)->ThreadRange(1, NumCPUs());
443 BENCHMARK(Search_Digits_NFA)->ThreadRange(1, NumCPUs());
444 BENCHMARK(Search_Digits_OnePass)->ThreadRange(1, NumCPUs());
445 #ifdef USEPCRE
446 BENCHMARK(Search_Digits_PCRE)->ThreadRange(1, NumCPUs());
447 #endif
448 BENCHMARK(Search_Digits_RE2)->ThreadRange(1, NumCPUs());
449 BENCHMARK(Search_Digits_BitState)->ThreadRange(1, NumCPUs());
450
451 // Benchmark: use regexp to parse digit fields in phone number.
452
Parse3Digits(int iters,void (* parse3)(int,const char *,const StringPiece &))453 void Parse3Digits(int iters,
454 void (*parse3)(int, const char*, const StringPiece&)) {
455 BenchmarkMemoryUsage();
456 parse3(iters, "([0-9]+)-([0-9]+)-([0-9]+)", "650-253-0001");
457 SetBenchmarkItemsProcessed(iters);
458 }
459
Parse_Digits_NFA(int i)460 void Parse_Digits_NFA(int i) { Parse3Digits(i, Parse3NFA); }
Parse_Digits_OnePass(int i)461 void Parse_Digits_OnePass(int i) { Parse3Digits(i, Parse3OnePass); }
Parse_Digits_PCRE(int i)462 void Parse_Digits_PCRE(int i) { Parse3Digits(i, Parse3PCRE); }
Parse_Digits_RE2(int i)463 void Parse_Digits_RE2(int i) { Parse3Digits(i, Parse3RE2); }
Parse_Digits_Backtrack(int i)464 void Parse_Digits_Backtrack(int i) { Parse3Digits(i, Parse3Backtrack); }
Parse_Digits_BitState(int i)465 void Parse_Digits_BitState(int i) { Parse3Digits(i, Parse3BitState); }
466
467 BENCHMARK(Parse_Digits_NFA)->ThreadRange(1, NumCPUs());
468 BENCHMARK(Parse_Digits_OnePass)->ThreadRange(1, NumCPUs());
469 #ifdef USEPCRE
470 BENCHMARK(Parse_Digits_PCRE)->ThreadRange(1, NumCPUs());
471 #endif
472 BENCHMARK(Parse_Digits_RE2)->ThreadRange(1, NumCPUs());
473 BENCHMARK(Parse_Digits_Backtrack)->ThreadRange(1, NumCPUs());
474 BENCHMARK(Parse_Digits_BitState)->ThreadRange(1, NumCPUs());
475
Parse_CachedDigits_NFA(int i)476 void Parse_CachedDigits_NFA(int i) { Parse3Digits(i, Parse3CachedNFA); }
Parse_CachedDigits_OnePass(int i)477 void Parse_CachedDigits_OnePass(int i) { Parse3Digits(i, Parse3CachedOnePass); }
Parse_CachedDigits_PCRE(int i)478 void Parse_CachedDigits_PCRE(int i) { Parse3Digits(i, Parse3CachedPCRE); }
Parse_CachedDigits_RE2(int i)479 void Parse_CachedDigits_RE2(int i) { Parse3Digits(i, Parse3CachedRE2); }
Parse_CachedDigits_Backtrack(int i)480 void Parse_CachedDigits_Backtrack(int i) { Parse3Digits(i, Parse3CachedBacktrack); }
Parse_CachedDigits_BitState(int i)481 void Parse_CachedDigits_BitState(int i) { Parse3Digits(i, Parse3CachedBitState); }
482
483 BENCHMARK(Parse_CachedDigits_NFA)->ThreadRange(1, NumCPUs());
484 BENCHMARK(Parse_CachedDigits_OnePass)->ThreadRange(1, NumCPUs());
485 #ifdef USEPCRE
486 BENCHMARK(Parse_CachedDigits_PCRE)->ThreadRange(1, NumCPUs());
487 #endif
488 BENCHMARK(Parse_CachedDigits_Backtrack)->ThreadRange(1, NumCPUs());
489 BENCHMARK(Parse_CachedDigits_RE2)->ThreadRange(1, NumCPUs());
490 BENCHMARK(Parse_CachedDigits_BitState)->ThreadRange(1, NumCPUs());
491
Parse3DigitDs(int iters,void (* parse3)(int,const char *,const StringPiece &))492 void Parse3DigitDs(int iters,
493 void (*parse3)(int, const char*, const StringPiece&)) {
494 BenchmarkMemoryUsage();
495 parse3(iters, "(\\d+)-(\\d+)-(\\d+)", "650-253-0001");
496 SetBenchmarkItemsProcessed(iters);
497 }
498
Parse_DigitDs_NFA(int i)499 void Parse_DigitDs_NFA(int i) { Parse3DigitDs(i, Parse3NFA); }
Parse_DigitDs_OnePass(int i)500 void Parse_DigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3OnePass); }
Parse_DigitDs_PCRE(int i)501 void Parse_DigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3PCRE); }
Parse_DigitDs_RE2(int i)502 void Parse_DigitDs_RE2(int i) { Parse3DigitDs(i, Parse3RE2); }
Parse_DigitDs_Backtrack(int i)503 void Parse_DigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_DigitDs_BitState(int i)504 void Parse_DigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
505
506 BENCHMARK(Parse_DigitDs_NFA)->ThreadRange(1, NumCPUs());
507 BENCHMARK(Parse_DigitDs_OnePass)->ThreadRange(1, NumCPUs());
508 #ifdef USEPCRE
509 BENCHMARK(Parse_DigitDs_PCRE)->ThreadRange(1, NumCPUs());
510 #endif
511 BENCHMARK(Parse_DigitDs_RE2)->ThreadRange(1, NumCPUs());
512 BENCHMARK(Parse_DigitDs_Backtrack)->ThreadRange(1, NumCPUs());
513 BENCHMARK(Parse_DigitDs_BitState)->ThreadRange(1, NumCPUs());
514
Parse_CachedDigitDs_NFA(int i)515 void Parse_CachedDigitDs_NFA(int i) { Parse3DigitDs(i, Parse3CachedNFA); }
Parse_CachedDigitDs_OnePass(int i)516 void Parse_CachedDigitDs_OnePass(int i) { Parse3DigitDs(i, Parse3CachedOnePass); }
Parse_CachedDigitDs_PCRE(int i)517 void Parse_CachedDigitDs_PCRE(int i) { Parse3DigitDs(i, Parse3CachedPCRE); }
Parse_CachedDigitDs_RE2(int i)518 void Parse_CachedDigitDs_RE2(int i) { Parse3DigitDs(i, Parse3CachedRE2); }
Parse_CachedDigitDs_Backtrack(int i)519 void Parse_CachedDigitDs_Backtrack(int i) { Parse3DigitDs(i, Parse3CachedBacktrack); }
Parse_CachedDigitDs_BitState(int i)520 void Parse_CachedDigitDs_BitState(int i) { Parse3DigitDs(i, Parse3CachedBitState); }
521
522 BENCHMARK(Parse_CachedDigitDs_NFA)->ThreadRange(1, NumCPUs());
523 BENCHMARK(Parse_CachedDigitDs_OnePass)->ThreadRange(1, NumCPUs());
524 #ifdef USEPCRE
525 BENCHMARK(Parse_CachedDigitDs_PCRE)->ThreadRange(1, NumCPUs());
526 #endif
527 BENCHMARK(Parse_CachedDigitDs_Backtrack)->ThreadRange(1, NumCPUs());
528 BENCHMARK(Parse_CachedDigitDs_RE2)->ThreadRange(1, NumCPUs());
529 BENCHMARK(Parse_CachedDigitDs_BitState)->ThreadRange(1, NumCPUs());
530
531 // Benchmark: splitting off leading number field.
532
Parse1Split(int iters,void (* parse1)(int,const char *,const StringPiece &))533 void Parse1Split(int iters,
534 void (*parse1)(int, const char*, const StringPiece&)) {
535 BenchmarkMemoryUsage();
536 parse1(iters, "[0-9]+-(.*)", "650-253-0001");
537 SetBenchmarkItemsProcessed(iters);
538 }
539
Parse_Split_NFA(int i)540 void Parse_Split_NFA(int i) { Parse1Split(i, Parse1NFA); }
Parse_Split_OnePass(int i)541 void Parse_Split_OnePass(int i) { Parse1Split(i, Parse1OnePass); }
Parse_Split_PCRE(int i)542 void Parse_Split_PCRE(int i) { Parse1Split(i, Parse1PCRE); }
Parse_Split_RE2(int i)543 void Parse_Split_RE2(int i) { Parse1Split(i, Parse1RE2); }
Parse_Split_BitState(int i)544 void Parse_Split_BitState(int i) { Parse1Split(i, Parse1BitState); }
545
546 BENCHMARK(Parse_Split_NFA)->ThreadRange(1, NumCPUs());
547 BENCHMARK(Parse_Split_OnePass)->ThreadRange(1, NumCPUs());
548 #ifdef USEPCRE
549 BENCHMARK(Parse_Split_PCRE)->ThreadRange(1, NumCPUs());
550 #endif
551 BENCHMARK(Parse_Split_RE2)->ThreadRange(1, NumCPUs());
552 BENCHMARK(Parse_Split_BitState)->ThreadRange(1, NumCPUs());
553
Parse_CachedSplit_NFA(int i)554 void Parse_CachedSplit_NFA(int i) { Parse1Split(i, Parse1CachedNFA); }
Parse_CachedSplit_OnePass(int i)555 void Parse_CachedSplit_OnePass(int i) { Parse1Split(i, Parse1CachedOnePass); }
Parse_CachedSplit_PCRE(int i)556 void Parse_CachedSplit_PCRE(int i) { Parse1Split(i, Parse1CachedPCRE); }
Parse_CachedSplit_RE2(int i)557 void Parse_CachedSplit_RE2(int i) { Parse1Split(i, Parse1CachedRE2); }
Parse_CachedSplit_BitState(int i)558 void Parse_CachedSplit_BitState(int i) { Parse1Split(i, Parse1CachedBitState); }
559
560 BENCHMARK(Parse_CachedSplit_NFA)->ThreadRange(1, NumCPUs());
561 BENCHMARK(Parse_CachedSplit_OnePass)->ThreadRange(1, NumCPUs());
562 #ifdef USEPCRE
563 BENCHMARK(Parse_CachedSplit_PCRE)->ThreadRange(1, NumCPUs());
564 #endif
565 BENCHMARK(Parse_CachedSplit_RE2)->ThreadRange(1, NumCPUs());
566 BENCHMARK(Parse_CachedSplit_BitState)->ThreadRange(1, NumCPUs());
567
568 // Benchmark: splitting off leading number field but harder (ambiguous regexp).
569
Parse1SplitHard(int iters,void (* run)(int,const char *,const StringPiece &))570 void Parse1SplitHard(int iters,
571 void (*run)(int, const char*, const StringPiece&)) {
572 BenchmarkMemoryUsage();
573 run(iters, "[0-9]+.(.*)", "650-253-0001");
574 SetBenchmarkItemsProcessed(iters);
575 }
576
Parse_SplitHard_NFA(int i)577 void Parse_SplitHard_NFA(int i) { Parse1SplitHard(i, Parse1NFA); }
Parse_SplitHard_PCRE(int i)578 void Parse_SplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1PCRE); }
Parse_SplitHard_RE2(int i)579 void Parse_SplitHard_RE2(int i) { Parse1SplitHard(i, Parse1RE2); }
Parse_SplitHard_BitState(int i)580 void Parse_SplitHard_BitState(int i) { Parse1SplitHard(i, Parse1BitState); }
581
582 #ifdef USEPCRE
583 BENCHMARK(Parse_SplitHard_PCRE)->ThreadRange(1, NumCPUs());
584 #endif
585 BENCHMARK(Parse_SplitHard_RE2)->ThreadRange(1, NumCPUs());
586 BENCHMARK(Parse_SplitHard_BitState)->ThreadRange(1, NumCPUs());
587 BENCHMARK(Parse_SplitHard_NFA)->ThreadRange(1, NumCPUs());
588
Parse_CachedSplitHard_NFA(int i)589 void Parse_CachedSplitHard_NFA(int i) { Parse1SplitHard(i, Parse1CachedNFA); }
Parse_CachedSplitHard_PCRE(int i)590 void Parse_CachedSplitHard_PCRE(int i) { Parse1SplitHard(i, Parse1CachedPCRE); }
Parse_CachedSplitHard_RE2(int i)591 void Parse_CachedSplitHard_RE2(int i) { Parse1SplitHard(i, Parse1CachedRE2); }
Parse_CachedSplitHard_BitState(int i)592 void Parse_CachedSplitHard_BitState(int i) { Parse1SplitHard(i, Parse1CachedBitState); }
Parse_CachedSplitHard_Backtrack(int i)593 void Parse_CachedSplitHard_Backtrack(int i) { Parse1SplitHard(i, Parse1CachedBacktrack); }
594
595 #ifdef USEPCRE
596 BENCHMARK(Parse_CachedSplitHard_PCRE)->ThreadRange(1, NumCPUs());
597 #endif
598 BENCHMARK(Parse_CachedSplitHard_RE2)->ThreadRange(1, NumCPUs());
599 BENCHMARK(Parse_CachedSplitHard_BitState)->ThreadRange(1, NumCPUs());
600 BENCHMARK(Parse_CachedSplitHard_NFA)->ThreadRange(1, NumCPUs());
601 BENCHMARK(Parse_CachedSplitHard_Backtrack)->ThreadRange(1, NumCPUs());
602
603 // Benchmark: Parse1SplitHard, big text, small match.
604
Parse1SplitBig1(int iters,void (* run)(int,const char *,const StringPiece &))605 void Parse1SplitBig1(int iters,
606 void (*run)(int, const char*, const StringPiece&)) {
607 string s;
608 s.append(100000, 'x');
609 s.append("650-253-0001");
610 BenchmarkMemoryUsage();
611 run(iters, "[0-9]+.(.*)", s);
612 SetBenchmarkItemsProcessed(iters);
613 }
614
Parse_CachedSplitBig1_PCRE(int i)615 void Parse_CachedSplitBig1_PCRE(int i) { Parse1SplitBig1(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig1_RE2(int i)616 void Parse_CachedSplitBig1_RE2(int i) { Parse1SplitBig1(i, SearchParse1CachedRE2); }
617
618 #ifdef USEPCRE
619 BENCHMARK(Parse_CachedSplitBig1_PCRE)->ThreadRange(1, NumCPUs());
620 #endif
621 BENCHMARK(Parse_CachedSplitBig1_RE2)->ThreadRange(1, NumCPUs());
622
623 // Benchmark: Parse1SplitHard, big text, big match.
624
Parse1SplitBig2(int iters,void (* run)(int,const char *,const StringPiece &))625 void Parse1SplitBig2(int iters,
626 void (*run)(int, const char*, const StringPiece&)) {
627 string s;
628 s.append("650-253-");
629 s.append(100000, '0');
630 BenchmarkMemoryUsage();
631 run(iters, "[0-9]+.(.*)", s);
632 SetBenchmarkItemsProcessed(iters);
633 }
634
Parse_CachedSplitBig2_PCRE(int i)635 void Parse_CachedSplitBig2_PCRE(int i) { Parse1SplitBig2(i, SearchParse1CachedPCRE); }
Parse_CachedSplitBig2_RE2(int i)636 void Parse_CachedSplitBig2_RE2(int i) { Parse1SplitBig2(i, SearchParse1CachedRE2); }
637
638 #ifdef USEPCRE
639 BENCHMARK(Parse_CachedSplitBig2_PCRE)->ThreadRange(1, NumCPUs());
640 #endif
641 BENCHMARK(Parse_CachedSplitBig2_RE2)->ThreadRange(1, NumCPUs());
642
643 // Benchmark: measure time required to parse (but not execute)
644 // a simple regular expression.
645
ParseRegexp(int iters,const string & regexp)646 void ParseRegexp(int iters, const string& regexp) {
647 for (int i = 0; i < iters; i++) {
648 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
649 CHECK(re);
650 re->Decref();
651 }
652 }
653
SimplifyRegexp(int iters,const string & regexp)654 void SimplifyRegexp(int iters, const string& regexp) {
655 for (int i = 0; i < iters; i++) {
656 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
657 CHECK(re);
658 Regexp* sre = re->Simplify();
659 CHECK(sre);
660 sre->Decref();
661 re->Decref();
662 }
663 }
664
NullWalkRegexp(int iters,const string & regexp)665 void NullWalkRegexp(int iters, const string& regexp) {
666 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
667 CHECK(re);
668 for (int i = 0; i < iters; i++) {
669 re->NullWalk();
670 }
671 re->Decref();
672 }
673
SimplifyCompileRegexp(int iters,const string & regexp)674 void SimplifyCompileRegexp(int iters, const string& regexp) {
675 for (int i = 0; i < iters; i++) {
676 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
677 CHECK(re);
678 Regexp* sre = re->Simplify();
679 CHECK(sre);
680 Prog* prog = sre->CompileToProg(0);
681 CHECK(prog);
682 delete prog;
683 sre->Decref();
684 re->Decref();
685 }
686 }
687
CompileRegexp(int iters,const string & regexp)688 void CompileRegexp(int iters, const string& regexp) {
689 for (int i = 0; i < iters; i++) {
690 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
691 CHECK(re);
692 Prog* prog = re->CompileToProg(0);
693 CHECK(prog);
694 delete prog;
695 re->Decref();
696 }
697 }
698
CompileToProg(int iters,const string & regexp)699 void CompileToProg(int iters, const string& regexp) {
700 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
701 CHECK(re);
702 for (int i = 0; i < iters; i++) {
703 Prog* prog = re->CompileToProg(0);
704 CHECK(prog);
705 delete prog;
706 }
707 re->Decref();
708 }
709
CompileByteMap(int iters,const string & regexp)710 void CompileByteMap(int iters, const string& regexp) {
711 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
712 CHECK(re);
713 Prog* prog = re->CompileToProg(0);
714 CHECK(prog);
715 for (int i = 0; i < iters; i++) {
716 prog->ComputeByteMap();
717 }
718 delete prog;
719 re->Decref();
720 }
721
CompilePCRE(int iters,const string & regexp)722 void CompilePCRE(int iters, const string& regexp) {
723 for (int i = 0; i < iters; i++) {
724 PCRE re(regexp, PCRE::UTF8);
725 CHECK_EQ(re.error(), "");
726 }
727 }
728
CompileRE2(int iters,const string & regexp)729 void CompileRE2(int iters, const string& regexp) {
730 for (int i = 0; i < iters; i++) {
731 RE2 re(regexp);
732 CHECK_EQ(re.error(), "");
733 }
734 }
735
RunBuild(int iters,const string & regexp,void (* run)(int,const string &))736 void RunBuild(int iters, const string& regexp, void (*run)(int, const string&)) {
737 run(iters, regexp);
738 SetBenchmarkItemsProcessed(iters);
739 }
740
741 } // namespace re2
742
743 DEFINE_string(compile_regexp, "(.*)-(\\d+)-of-(\\d+)", "regexp for compile benchmarks");
744
745 namespace re2 {
746
BM_PCRE_Compile(int i)747 void BM_PCRE_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompilePCRE); }
BM_Regexp_Parse(int i)748 void BM_Regexp_Parse(int i) { RunBuild(i, FLAGS_compile_regexp, ParseRegexp); }
BM_Regexp_Simplify(int i)749 void BM_Regexp_Simplify(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyRegexp); }
BM_CompileToProg(int i)750 void BM_CompileToProg(int i) { RunBuild(i, FLAGS_compile_regexp, CompileToProg); }
BM_CompileByteMap(int i)751 void BM_CompileByteMap(int i) { RunBuild(i, FLAGS_compile_regexp, CompileByteMap); }
BM_Regexp_Compile(int i)752 void BM_Regexp_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRegexp); }
BM_Regexp_SimplifyCompile(int i)753 void BM_Regexp_SimplifyCompile(int i) { RunBuild(i, FLAGS_compile_regexp, SimplifyCompileRegexp); }
BM_Regexp_NullWalk(int i)754 void BM_Regexp_NullWalk(int i) { RunBuild(i, FLAGS_compile_regexp, NullWalkRegexp); }
BM_RE2_Compile(int i)755 void BM_RE2_Compile(int i) { RunBuild(i, FLAGS_compile_regexp, CompileRE2); }
756
757 #ifdef USEPCRE
758 BENCHMARK(BM_PCRE_Compile)->ThreadRange(1, NumCPUs());
759 #endif
760 BENCHMARK(BM_Regexp_Parse)->ThreadRange(1, NumCPUs());
761 BENCHMARK(BM_Regexp_Simplify)->ThreadRange(1, NumCPUs());
762 BENCHMARK(BM_CompileToProg)->ThreadRange(1, NumCPUs());
763 BENCHMARK(BM_CompileByteMap)->ThreadRange(1, NumCPUs());
764 BENCHMARK(BM_Regexp_Compile)->ThreadRange(1, NumCPUs());
765 BENCHMARK(BM_Regexp_SimplifyCompile)->ThreadRange(1, NumCPUs());
766 BENCHMARK(BM_Regexp_NullWalk)->ThreadRange(1, NumCPUs());
767 BENCHMARK(BM_RE2_Compile)->ThreadRange(1, NumCPUs());
768
769 // Makes text of size nbytes, then calls run to search
770 // the text for regexp iters times.
SearchPhone(int iters,int nbytes,ParseImpl * search)771 void SearchPhone(int iters, int nbytes, ParseImpl* search) {
772 StopBenchmarkTiming();
773 string s;
774 MakeText(&s, nbytes);
775 s.append("(650) 253-0001");
776 BenchmarkMemoryUsage();
777 StartBenchmarkTiming();
778 search(iters, "(\\d{3}-|\\(\\d{3}\\)\\s+)(\\d{3}-\\d{4})", s);
779 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*nbytes);
780 }
781
SearchPhone_CachedPCRE(int i,int n)782 void SearchPhone_CachedPCRE(int i, int n) {
783 SearchPhone(i, n, SearchParse2CachedPCRE);
784 }
SearchPhone_CachedRE2(int i,int n)785 void SearchPhone_CachedRE2(int i, int n) {
786 SearchPhone(i, n, SearchParse2CachedRE2);
787 }
788
789 #ifdef USEPCRE
790 BENCHMARK_RANGE(SearchPhone_CachedPCRE, 8, 16<<20)->ThreadRange(1, NumCPUs());
791 #endif
792 BENCHMARK_RANGE(SearchPhone_CachedRE2, 8, 16<<20)->ThreadRange(1, NumCPUs());
793
794 /*
795 TODO(rsc): Make this work again.
796
797 // Generates and returns a string over binary alphabet {0,1} that contains
798 // all possible binary sequences of length n as subsequences. The obvious
799 // brute force method would generate a string of length n * 2^n, but this
800 // generates a string of length n + 2^n - 1 called a De Bruijn cycle.
801 // See Knuth, The Art of Computer Programming, Vol 2, Exercise 3.2.2 #17.
802 static string DeBruijnString(int n) {
803 CHECK_LT(n, 8*sizeof(int));
804 CHECK_GT(n, 0);
805
806 std::vector<bool> did(1<<n);
807 for (int i = 0; i < 1<<n; i++)
808 did[i] = false;
809
810 string s;
811 for (int i = 0; i < n-1; i++)
812 s.append("0");
813 int bits = 0;
814 int mask = (1<<n) - 1;
815 for (int i = 0; i < (1<<n); i++) {
816 bits <<= 1;
817 bits &= mask;
818 if (!did[bits|1]) {
819 bits |= 1;
820 s.append("1");
821 } else {
822 s.append("0");
823 }
824 CHECK(!did[bits]);
825 did[bits] = true;
826 }
827 return s;
828 }
829
830 void CacheFill(int iters, int n, SearchImpl *srch) {
831 string s = DeBruijnString(n+1);
832 string t;
833 for (int i = n+1; i < 20; i++) {
834 t = s + s;
835 using std::swap;
836 swap(s, t);
837 }
838 srch(iters, StringPrintf("0[01]{%d}$", n).c_str(), s,
839 Prog::kUnanchored, true);
840 SetBenchmarkBytesProcessed(static_cast<int64_t>(iters)*s.size());
841 }
842
843 void CacheFillPCRE(int i, int n) { CacheFill(i, n, SearchCachedPCRE); }
844 void CacheFillRE2(int i, int n) { CacheFill(i, n, SearchCachedRE2); }
845 void CacheFillNFA(int i, int n) { CacheFill(i, n, SearchCachedNFA); }
846 void CacheFillDFA(int i, int n) { CacheFill(i, n, SearchCachedDFA); }
847
848 // BENCHMARK_WITH_ARG uses __LINE__ to generate distinct identifiers
849 // for the static BenchmarkRegisterer, which makes it unusable inside
850 // a macro like DO24 below. MY_BENCHMARK_WITH_ARG uses the argument a
851 // to make the identifiers distinct (only possible when 'a' is a simple
852 // expression like 2, not like 1+1).
853 #define MY_BENCHMARK_WITH_ARG(n, a) \
854 bool __benchmark_ ## n ## a = \
855 (new ::testing::Benchmark(#n, NewPermanentCallback(&n)))->ThreadRange(1, NumCPUs());
856
857 #define DO24(A, B) \
858 A(B, 1); A(B, 2); A(B, 3); A(B, 4); A(B, 5); A(B, 6); \
859 A(B, 7); A(B, 8); A(B, 9); A(B, 10); A(B, 11); A(B, 12); \
860 A(B, 13); A(B, 14); A(B, 15); A(B, 16); A(B, 17); A(B, 18); \
861 A(B, 19); A(B, 20); A(B, 21); A(B, 22); A(B, 23); A(B, 24);
862
863 DO24(MY_BENCHMARK_WITH_ARG, CacheFillPCRE)
864 DO24(MY_BENCHMARK_WITH_ARG, CacheFillNFA)
865 DO24(MY_BENCHMARK_WITH_ARG, CacheFillRE2)
866 DO24(MY_BENCHMARK_WITH_ARG, CacheFillDFA)
867
868 #undef DO24
869 #undef MY_BENCHMARK_WITH_ARG
870 */
871
872 ////////////////////////////////////////////////////////////////////////
873 //
874 // Implementation routines. Sad that there are so many,
875 // but all the interfaces are slightly different.
876
877 // Runs implementation to search for regexp in text, iters times.
878 // Expect_match says whether the regexp should be found.
879 // Anchored says whether to run an anchored search.
880
SearchDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)881 void SearchDFA(int iters, const char* regexp, const StringPiece& text,
882 Prog::Anchor anchor, bool expect_match) {
883 for (int i = 0; i < iters; i++) {
884 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
885 CHECK(re);
886 Prog* prog = re->CompileToProg(0);
887 CHECK(prog);
888 bool failed = false;
889 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
890 NULL, &failed, NULL),
891 expect_match);
892 CHECK(!failed);
893 delete prog;
894 re->Decref();
895 }
896 }
897
SearchNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)898 void SearchNFA(int iters, const char* regexp, const StringPiece& text,
899 Prog::Anchor anchor, bool expect_match) {
900 for (int i = 0; i < iters; i++) {
901 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
902 CHECK(re);
903 Prog* prog = re->CompileToProg(0);
904 CHECK(prog);
905 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
906 NULL, 0),
907 expect_match);
908 delete prog;
909 re->Decref();
910 }
911 }
912
SearchOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)913 void SearchOnePass(int iters, const char* regexp, const StringPiece& text,
914 Prog::Anchor anchor, bool expect_match) {
915 for (int i = 0; i < iters; i++) {
916 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
917 CHECK(re);
918 Prog* prog = re->CompileToProg(0);
919 CHECK(prog);
920 CHECK(prog->IsOnePass());
921 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
922 expect_match);
923 delete prog;
924 re->Decref();
925 }
926 }
927
SearchBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)928 void SearchBitState(int iters, const char* regexp, const StringPiece& text,
929 Prog::Anchor anchor, bool expect_match) {
930 for (int i = 0; i < iters; i++) {
931 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
932 CHECK(re);
933 Prog* prog = re->CompileToProg(0);
934 CHECK(prog);
935 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
936 expect_match);
937 delete prog;
938 re->Decref();
939 }
940 }
941
SearchPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)942 void SearchPCRE(int iters, const char* regexp, const StringPiece& text,
943 Prog::Anchor anchor, bool expect_match) {
944 for (int i = 0; i < iters; i++) {
945 PCRE re(regexp, PCRE::UTF8);
946 CHECK_EQ(re.error(), "");
947 if (anchor == Prog::kAnchored)
948 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
949 else
950 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
951 }
952 }
953
SearchRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)954 void SearchRE2(int iters, const char* regexp, const StringPiece& text,
955 Prog::Anchor anchor, bool expect_match) {
956 for (int i = 0; i < iters; i++) {
957 RE2 re(regexp);
958 CHECK_EQ(re.error(), "");
959 if (anchor == Prog::kAnchored)
960 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
961 else
962 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
963 }
964 }
965
966 // SearchCachedXXX is like SearchXXX but only does the
967 // regexp parsing and compiling once. This lets us measure
968 // search time without the per-regexp overhead.
969
SearchCachedDFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)970 void SearchCachedDFA(int iters, const char* regexp, const StringPiece& text,
971 Prog::Anchor anchor, bool expect_match) {
972 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
973 CHECK(re);
974 Prog* prog = re->CompileToProg(1LL<<31);
975 CHECK(prog);
976 for (int i = 0; i < iters; i++) {
977 bool failed = false;
978 CHECK_EQ(prog->SearchDFA(text, StringPiece(), anchor, Prog::kFirstMatch,
979 NULL, &failed, NULL),
980 expect_match);
981 CHECK(!failed);
982 }
983 delete prog;
984 re->Decref();
985 }
986
SearchCachedNFA(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)987 void SearchCachedNFA(int iters, const char* regexp, const StringPiece& text,
988 Prog::Anchor anchor, bool expect_match) {
989 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
990 CHECK(re);
991 Prog* prog = re->CompileToProg(0);
992 CHECK(prog);
993 for (int i = 0; i < iters; i++) {
994 CHECK_EQ(prog->SearchNFA(text, StringPiece(), anchor, Prog::kFirstMatch,
995 NULL, 0),
996 expect_match);
997 }
998 delete prog;
999 re->Decref();
1000 }
1001
SearchCachedOnePass(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1002 void SearchCachedOnePass(int iters, const char* regexp, const StringPiece& text,
1003 Prog::Anchor anchor, bool expect_match) {
1004 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1005 CHECK(re);
1006 Prog* prog = re->CompileToProg(0);
1007 CHECK(prog);
1008 CHECK(prog->IsOnePass());
1009 for (int i = 0; i < iters; i++)
1010 CHECK_EQ(prog->SearchOnePass(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1011 expect_match);
1012 delete prog;
1013 re->Decref();
1014 }
1015
SearchCachedBitState(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1016 void SearchCachedBitState(int iters, const char* regexp, const StringPiece& text,
1017 Prog::Anchor anchor, bool expect_match) {
1018 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1019 CHECK(re);
1020 Prog* prog = re->CompileToProg(0);
1021 CHECK(prog);
1022 for (int i = 0; i < iters; i++)
1023 CHECK_EQ(prog->SearchBitState(text, text, anchor, Prog::kFirstMatch, NULL, 0),
1024 expect_match);
1025 delete prog;
1026 re->Decref();
1027 }
1028
SearchCachedPCRE(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1029 void SearchCachedPCRE(int iters, const char* regexp, const StringPiece& text,
1030 Prog::Anchor anchor, bool expect_match) {
1031 PCRE re(regexp, PCRE::UTF8);
1032 CHECK_EQ(re.error(), "");
1033 for (int i = 0; i < iters; i++) {
1034 if (anchor == Prog::kAnchored)
1035 CHECK_EQ(PCRE::FullMatch(text, re), expect_match);
1036 else
1037 CHECK_EQ(PCRE::PartialMatch(text, re), expect_match);
1038 }
1039 }
1040
SearchCachedRE2(int iters,const char * regexp,const StringPiece & text,Prog::Anchor anchor,bool expect_match)1041 void SearchCachedRE2(int iters, const char* regexp, const StringPiece& text,
1042 Prog::Anchor anchor, bool expect_match) {
1043 RE2 re(regexp);
1044 CHECK_EQ(re.error(), "");
1045 for (int i = 0; i < iters; i++) {
1046 if (anchor == Prog::kAnchored)
1047 CHECK_EQ(RE2::FullMatch(text, re), expect_match);
1048 else
1049 CHECK_EQ(RE2::PartialMatch(text, re), expect_match);
1050 }
1051 }
1052
1053
1054 // Runs implementation to full match regexp against text,
1055 // extracting three submatches. Expects match always.
1056
Parse3NFA(int iters,const char * regexp,const StringPiece & text)1057 void Parse3NFA(int iters, const char* regexp, const StringPiece& text) {
1058 for (int i = 0; i < iters; i++) {
1059 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1060 CHECK(re);
1061 Prog* prog = re->CompileToProg(0);
1062 CHECK(prog);
1063 StringPiece sp[4]; // 4 because sp[0] is whole match.
1064 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1065 Prog::kFullMatch, sp, 4));
1066 delete prog;
1067 re->Decref();
1068 }
1069 }
1070
Parse3OnePass(int iters,const char * regexp,const StringPiece & text)1071 void Parse3OnePass(int iters, const char* regexp, const StringPiece& text) {
1072 for (int i = 0; i < iters; i++) {
1073 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1074 CHECK(re);
1075 Prog* prog = re->CompileToProg(0);
1076 CHECK(prog);
1077 CHECK(prog->IsOnePass());
1078 StringPiece sp[4]; // 4 because sp[0] is whole match.
1079 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1080 delete prog;
1081 re->Decref();
1082 }
1083 }
1084
Parse3BitState(int iters,const char * regexp,const StringPiece & text)1085 void Parse3BitState(int iters, const char* regexp, const StringPiece& text) {
1086 for (int i = 0; i < iters; i++) {
1087 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1088 CHECK(re);
1089 Prog* prog = re->CompileToProg(0);
1090 CHECK(prog);
1091 StringPiece sp[4]; // 4 because sp[0] is whole match.
1092 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1093 delete prog;
1094 re->Decref();
1095 }
1096 }
1097
Parse3Backtrack(int iters,const char * regexp,const StringPiece & text)1098 void Parse3Backtrack(int iters, const char* regexp, const StringPiece& text) {
1099 for (int i = 0; i < iters; i++) {
1100 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1101 CHECK(re);
1102 Prog* prog = re->CompileToProg(0);
1103 CHECK(prog);
1104 StringPiece sp[4]; // 4 because sp[0] is whole match.
1105 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1106 delete prog;
1107 re->Decref();
1108 }
1109 }
1110
Parse3PCRE(int iters,const char * regexp,const StringPiece & text)1111 void Parse3PCRE(int iters, const char* regexp, const StringPiece& text) {
1112 for (int i = 0; i < iters; i++) {
1113 PCRE re(regexp, PCRE::UTF8);
1114 CHECK_EQ(re.error(), "");
1115 StringPiece sp1, sp2, sp3;
1116 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1117 }
1118 }
1119
Parse3RE2(int iters,const char * regexp,const StringPiece & text)1120 void Parse3RE2(int iters, const char* regexp, const StringPiece& text) {
1121 for (int i = 0; i < iters; i++) {
1122 RE2 re(regexp);
1123 CHECK_EQ(re.error(), "");
1124 StringPiece sp1, sp2, sp3;
1125 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1126 }
1127 }
1128
Parse3CachedNFA(int iters,const char * regexp,const StringPiece & text)1129 void Parse3CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1130 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1131 CHECK(re);
1132 Prog* prog = re->CompileToProg(0);
1133 CHECK(prog);
1134 StringPiece sp[4]; // 4 because sp[0] is whole match.
1135 for (int i = 0; i < iters; i++) {
1136 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1137 Prog::kFullMatch, sp, 4));
1138 }
1139 delete prog;
1140 re->Decref();
1141 }
1142
Parse3CachedOnePass(int iters,const char * regexp,const StringPiece & text)1143 void Parse3CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1144 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1145 CHECK(re);
1146 Prog* prog = re->CompileToProg(0);
1147 CHECK(prog);
1148 CHECK(prog->IsOnePass());
1149 StringPiece sp[4]; // 4 because sp[0] is whole match.
1150 for (int i = 0; i < iters; i++)
1151 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1152 delete prog;
1153 re->Decref();
1154 }
1155
Parse3CachedBitState(int iters,const char * regexp,const StringPiece & text)1156 void Parse3CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1157 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1158 CHECK(re);
1159 Prog* prog = re->CompileToProg(0);
1160 CHECK(prog);
1161 StringPiece sp[4]; // 4 because sp[0] is whole match.
1162 for (int i = 0; i < iters; i++)
1163 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1164 delete prog;
1165 re->Decref();
1166 }
1167
Parse3CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1168 void Parse3CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1169 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1170 CHECK(re);
1171 Prog* prog = re->CompileToProg(0);
1172 CHECK(prog);
1173 StringPiece sp[4]; // 4 because sp[0] is whole match.
1174 for (int i = 0; i < iters; i++)
1175 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 4));
1176 delete prog;
1177 re->Decref();
1178 }
1179
Parse3CachedPCRE(int iters,const char * regexp,const StringPiece & text)1180 void Parse3CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1181 PCRE re(regexp, PCRE::UTF8);
1182 CHECK_EQ(re.error(), "");
1183 StringPiece sp1, sp2, sp3;
1184 for (int i = 0; i < iters; i++) {
1185 CHECK(PCRE::FullMatch(text, re, &sp1, &sp2, &sp3));
1186 }
1187 }
1188
Parse3CachedRE2(int iters,const char * regexp,const StringPiece & text)1189 void Parse3CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1190 RE2 re(regexp);
1191 CHECK_EQ(re.error(), "");
1192 StringPiece sp1, sp2, sp3;
1193 for (int i = 0; i < iters; i++) {
1194 CHECK(RE2::FullMatch(text, re, &sp1, &sp2, &sp3));
1195 }
1196 }
1197
1198
1199 // Runs implementation to full match regexp against text,
1200 // extracting three submatches. Expects match always.
1201
Parse1NFA(int iters,const char * regexp,const StringPiece & text)1202 void Parse1NFA(int iters, const char* regexp, const StringPiece& text) {
1203 for (int i = 0; i < iters; i++) {
1204 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1205 CHECK(re);
1206 Prog* prog = re->CompileToProg(0);
1207 CHECK(prog);
1208 StringPiece sp[2]; // 2 because sp[0] is whole match.
1209 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1210 Prog::kFullMatch, sp, 2));
1211 delete prog;
1212 re->Decref();
1213 }
1214 }
1215
Parse1OnePass(int iters,const char * regexp,const StringPiece & text)1216 void Parse1OnePass(int iters, const char* regexp, const StringPiece& text) {
1217 for (int i = 0; i < iters; i++) {
1218 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1219 CHECK(re);
1220 Prog* prog = re->CompileToProg(0);
1221 CHECK(prog);
1222 CHECK(prog->IsOnePass());
1223 StringPiece sp[2]; // 2 because sp[0] is whole match.
1224 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1225 delete prog;
1226 re->Decref();
1227 }
1228 }
1229
Parse1BitState(int iters,const char * regexp,const StringPiece & text)1230 void Parse1BitState(int iters, const char* regexp, const StringPiece& text) {
1231 for (int i = 0; i < iters; i++) {
1232 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1233 CHECK(re);
1234 Prog* prog = re->CompileToProg(0);
1235 CHECK(prog);
1236 StringPiece sp[2]; // 2 because sp[0] is whole match.
1237 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1238 delete prog;
1239 re->Decref();
1240 }
1241 }
1242
Parse1PCRE(int iters,const char * regexp,const StringPiece & text)1243 void Parse1PCRE(int iters, const char* regexp, const StringPiece& text) {
1244 for (int i = 0; i < iters; i++) {
1245 PCRE re(regexp, PCRE::UTF8);
1246 CHECK_EQ(re.error(), "");
1247 StringPiece sp1;
1248 CHECK(PCRE::FullMatch(text, re, &sp1));
1249 }
1250 }
1251
Parse1RE2(int iters,const char * regexp,const StringPiece & text)1252 void Parse1RE2(int iters, const char* regexp, const StringPiece& text) {
1253 for (int i = 0; i < iters; i++) {
1254 RE2 re(regexp);
1255 CHECK_EQ(re.error(), "");
1256 StringPiece sp1;
1257 CHECK(RE2::FullMatch(text, re, &sp1));
1258 }
1259 }
1260
Parse1CachedNFA(int iters,const char * regexp,const StringPiece & text)1261 void Parse1CachedNFA(int iters, const char* regexp, const StringPiece& text) {
1262 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1263 CHECK(re);
1264 Prog* prog = re->CompileToProg(0);
1265 CHECK(prog);
1266 StringPiece sp[2]; // 2 because sp[0] is whole match.
1267 for (int i = 0; i < iters; i++) {
1268 CHECK(prog->SearchNFA(text, StringPiece(), Prog::kAnchored,
1269 Prog::kFullMatch, sp, 2));
1270 }
1271 delete prog;
1272 re->Decref();
1273 }
1274
Parse1CachedOnePass(int iters,const char * regexp,const StringPiece & text)1275 void Parse1CachedOnePass(int iters, const char* regexp, const StringPiece& text) {
1276 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1277 CHECK(re);
1278 Prog* prog = re->CompileToProg(0);
1279 CHECK(prog);
1280 CHECK(prog->IsOnePass());
1281 StringPiece sp[2]; // 2 because sp[0] is whole match.
1282 for (int i = 0; i < iters; i++)
1283 CHECK(prog->SearchOnePass(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1284 delete prog;
1285 re->Decref();
1286 }
1287
Parse1CachedBitState(int iters,const char * regexp,const StringPiece & text)1288 void Parse1CachedBitState(int iters, const char* regexp, const StringPiece& text) {
1289 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1290 CHECK(re);
1291 Prog* prog = re->CompileToProg(0);
1292 CHECK(prog);
1293 StringPiece sp[2]; // 2 because sp[0] is whole match.
1294 for (int i = 0; i < iters; i++)
1295 CHECK(prog->SearchBitState(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1296 delete prog;
1297 re->Decref();
1298 }
1299
Parse1CachedBacktrack(int iters,const char * regexp,const StringPiece & text)1300 void Parse1CachedBacktrack(int iters, const char* regexp, const StringPiece& text) {
1301 Regexp* re = Regexp::Parse(regexp, Regexp::LikePerl, NULL);
1302 CHECK(re);
1303 Prog* prog = re->CompileToProg(0);
1304 CHECK(prog);
1305 StringPiece sp[2]; // 2 because sp[0] is whole match.
1306 for (int i = 0; i < iters; i++)
1307 CHECK(prog->UnsafeSearchBacktrack(text, text, Prog::kAnchored, Prog::kFullMatch, sp, 2));
1308 delete prog;
1309 re->Decref();
1310 }
1311
Parse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1312 void Parse1CachedPCRE(int iters, const char* regexp, const StringPiece& text) {
1313 PCRE re(regexp, PCRE::UTF8);
1314 CHECK_EQ(re.error(), "");
1315 StringPiece sp1;
1316 for (int i = 0; i < iters; i++) {
1317 CHECK(PCRE::FullMatch(text, re, &sp1));
1318 }
1319 }
1320
Parse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1321 void Parse1CachedRE2(int iters, const char* regexp, const StringPiece& text) {
1322 RE2 re(regexp);
1323 CHECK_EQ(re.error(), "");
1324 StringPiece sp1;
1325 for (int i = 0; i < iters; i++) {
1326 CHECK(RE2::FullMatch(text, re, &sp1));
1327 }
1328 }
1329
SearchParse2CachedPCRE(int iters,const char * regexp,const StringPiece & text)1330 void SearchParse2CachedPCRE(int iters, const char* regexp,
1331 const StringPiece& text) {
1332 PCRE re(regexp, PCRE::UTF8);
1333 CHECK_EQ(re.error(), "");
1334 for (int i = 0; i < iters; i++) {
1335 StringPiece sp1, sp2;
1336 CHECK(PCRE::PartialMatch(text, re, &sp1, &sp2));
1337 }
1338 }
1339
SearchParse2CachedRE2(int iters,const char * regexp,const StringPiece & text)1340 void SearchParse2CachedRE2(int iters, const char* regexp,
1341 const StringPiece& text) {
1342 RE2 re(regexp);
1343 CHECK_EQ(re.error(), "");
1344 for (int i = 0; i < iters; i++) {
1345 StringPiece sp1, sp2;
1346 CHECK(RE2::PartialMatch(text, re, &sp1, &sp2));
1347 }
1348 }
1349
SearchParse1CachedPCRE(int iters,const char * regexp,const StringPiece & text)1350 void SearchParse1CachedPCRE(int iters, const char* regexp,
1351 const StringPiece& text) {
1352 PCRE re(regexp, PCRE::UTF8);
1353 CHECK_EQ(re.error(), "");
1354 for (int i = 0; i < iters; i++) {
1355 StringPiece sp1;
1356 CHECK(PCRE::PartialMatch(text, re, &sp1));
1357 }
1358 }
1359
SearchParse1CachedRE2(int iters,const char * regexp,const StringPiece & text)1360 void SearchParse1CachedRE2(int iters, const char* regexp,
1361 const StringPiece& text) {
1362 RE2 re(regexp);
1363 CHECK_EQ(re.error(), "");
1364 for (int i = 0; i < iters; i++) {
1365 StringPiece sp1;
1366 CHECK(RE2::PartialMatch(text, re, &sp1));
1367 }
1368 }
1369
EmptyPartialMatchPCRE(int n)1370 void EmptyPartialMatchPCRE(int n) {
1371 PCRE re("");
1372 for (int i = 0; i < n; i++) {
1373 PCRE::PartialMatch("", re);
1374 }
1375 }
1376
EmptyPartialMatchRE2(int n)1377 void EmptyPartialMatchRE2(int n) {
1378 RE2 re("");
1379 for (int i = 0; i < n; i++) {
1380 RE2::PartialMatch("", re);
1381 }
1382 }
1383 #ifdef USEPCRE
1384 BENCHMARK(EmptyPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1385 #endif
1386 BENCHMARK(EmptyPartialMatchRE2)->ThreadRange(1, NumCPUs());
1387
SimplePartialMatchPCRE(int n)1388 void SimplePartialMatchPCRE(int n) {
1389 PCRE re("abcdefg");
1390 for (int i = 0; i < n; i++) {
1391 PCRE::PartialMatch("abcdefg", re);
1392 }
1393 }
1394
SimplePartialMatchRE2(int n)1395 void SimplePartialMatchRE2(int n) {
1396 RE2 re("abcdefg");
1397 for (int i = 0; i < n; i++) {
1398 RE2::PartialMatch("abcdefg", re);
1399 }
1400 }
1401 #ifdef USEPCRE
1402 BENCHMARK(SimplePartialMatchPCRE)->ThreadRange(1, NumCPUs());
1403 #endif
1404 BENCHMARK(SimplePartialMatchRE2)->ThreadRange(1, NumCPUs());
1405
1406 static string http_text =
1407 "GET /asdfhjasdhfasdlfhasdflkjasdfkljasdhflaskdjhf"
1408 "alksdjfhasdlkfhasdlkjfhasdljkfhadsjklf HTTP/1.1";
1409
HTTPPartialMatchPCRE(int n)1410 void HTTPPartialMatchPCRE(int n) {
1411 StringPiece a;
1412 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1413 for (int i = 0; i < n; i++) {
1414 PCRE::PartialMatch(http_text, re, &a);
1415 }
1416 }
1417
HTTPPartialMatchRE2(int n)1418 void HTTPPartialMatchRE2(int n) {
1419 StringPiece a;
1420 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1421 for (int i = 0; i < n; i++) {
1422 RE2::PartialMatch(http_text, re, &a);
1423 }
1424 }
1425
1426 #ifdef USEPCRE
1427 BENCHMARK(HTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1428 #endif
1429 BENCHMARK(HTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1430
1431 static string smallhttp_text =
1432 "GET /abc HTTP/1.1";
1433
SmallHTTPPartialMatchPCRE(int n)1434 void SmallHTTPPartialMatchPCRE(int n) {
1435 StringPiece a;
1436 PCRE re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1437 for (int i = 0; i < n; i++) {
1438 PCRE::PartialMatch(smallhttp_text, re, &a);
1439 }
1440 }
1441
SmallHTTPPartialMatchRE2(int n)1442 void SmallHTTPPartialMatchRE2(int n) {
1443 StringPiece a;
1444 RE2 re("(?-s)^(?:GET|POST) +([^ ]+) HTTP");
1445 for (int i = 0; i < n; i++) {
1446 RE2::PartialMatch(smallhttp_text, re, &a);
1447 }
1448 }
1449
1450 #ifdef USEPCRE
1451 BENCHMARK(SmallHTTPPartialMatchPCRE)->ThreadRange(1, NumCPUs());
1452 #endif
1453 BENCHMARK(SmallHTTPPartialMatchRE2)->ThreadRange(1, NumCPUs());
1454
DotMatchPCRE(int n)1455 void DotMatchPCRE(int n) {
1456 StringPiece a;
1457 PCRE re("(?-s)^(.+)");
1458 for (int i = 0; i < n; i++) {
1459 PCRE::PartialMatch(http_text, re, &a);
1460 }
1461 }
1462
DotMatchRE2(int n)1463 void DotMatchRE2(int n) {
1464 StringPiece a;
1465 RE2 re("(?-s)^(.+)");
1466 for (int i = 0; i < n; i++) {
1467 RE2::PartialMatch(http_text, re, &a);
1468 }
1469 }
1470
1471 #ifdef USEPCRE
1472 BENCHMARK(DotMatchPCRE)->ThreadRange(1, NumCPUs());
1473 #endif
1474 BENCHMARK(DotMatchRE2)->ThreadRange(1, NumCPUs());
1475
ASCIIMatchPCRE(int n)1476 void ASCIIMatchPCRE(int n) {
1477 StringPiece a;
1478 PCRE re("(?-s)^([ -~]+)");
1479 for (int i = 0; i < n; i++) {
1480 PCRE::PartialMatch(http_text, re, &a);
1481 }
1482 }
1483
ASCIIMatchRE2(int n)1484 void ASCIIMatchRE2(int n) {
1485 StringPiece a;
1486 RE2 re("(?-s)^([ -~]+)");
1487 for (int i = 0; i < n; i++) {
1488 RE2::PartialMatch(http_text, re, &a);
1489 }
1490 }
1491
1492 #ifdef USEPCRE
1493 BENCHMARK(ASCIIMatchPCRE)->ThreadRange(1, NumCPUs());
1494 #endif
1495 BENCHMARK(ASCIIMatchRE2)->ThreadRange(1, NumCPUs());
1496
FullMatchPCRE(int iter,int n,const char * regexp)1497 void FullMatchPCRE(int iter, int n, const char *regexp) {
1498 StopBenchmarkTiming();
1499 string s;
1500 MakeText(&s, n);
1501 s += "ABCDEFGHIJ";
1502 BenchmarkMemoryUsage();
1503 PCRE re(regexp);
1504 StartBenchmarkTiming();
1505 for (int i = 0; i < iter; i++)
1506 CHECK(PCRE::FullMatch(s, re));
1507 SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n);
1508 }
1509
FullMatchRE2(int iter,int n,const char * regexp)1510 void FullMatchRE2(int iter, int n, const char *regexp) {
1511 StopBenchmarkTiming();
1512 string s;
1513 MakeText(&s, n);
1514 s += "ABCDEFGHIJ";
1515 BenchmarkMemoryUsage();
1516 RE2 re(regexp, RE2::Latin1);
1517 StartBenchmarkTiming();
1518 for (int i = 0; i < iter; i++)
1519 CHECK(RE2::FullMatch(s, re));
1520 SetBenchmarkBytesProcessed(static_cast<int64_t>(iter)*n);
1521 }
1522
FullMatch_DotStar_CachedPCRE(int i,int n)1523 void FullMatch_DotStar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*"); }
FullMatch_DotStar_CachedRE2(int i,int n)1524 void FullMatch_DotStar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*"); }
1525
FullMatch_DotStarDollar_CachedPCRE(int i,int n)1526 void FullMatch_DotStarDollar_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s).*$"); }
FullMatch_DotStarDollar_CachedRE2(int i,int n)1527 void FullMatch_DotStarDollar_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s).*$"); }
1528
FullMatch_DotStarCapture_CachedPCRE(int i,int n)1529 void FullMatch_DotStarCapture_CachedPCRE(int i, int n) { FullMatchPCRE(i, n, "(?s)((.*)()()($))"); }
FullMatch_DotStarCapture_CachedRE2(int i,int n)1530 void FullMatch_DotStarCapture_CachedRE2(int i, int n) { FullMatchRE2(i, n, "(?s)((.*)()()($))"); }
1531
1532 #ifdef USEPCRE
1533 BENCHMARK_RANGE(FullMatch_DotStar_CachedPCRE, 8, 2<<20);
1534 #endif
1535 BENCHMARK_RANGE(FullMatch_DotStar_CachedRE2, 8, 2<<20);
1536
1537 #ifdef USEPCRE
1538 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedPCRE, 8, 2<<20);
1539 #endif
1540 BENCHMARK_RANGE(FullMatch_DotStarDollar_CachedRE2, 8, 2<<20);
1541
1542 #ifdef USEPCRE
1543 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedPCRE, 8, 2<<20);
1544 #endif
1545 BENCHMARK_RANGE(FullMatch_DotStarCapture_CachedRE2, 8, 2<<20);
1546
PossibleMatchRangeCommon(int iter,const char * regexp)1547 void PossibleMatchRangeCommon(int iter, const char* regexp) {
1548 StopBenchmarkTiming();
1549 RE2 re(regexp);
1550 StartBenchmarkTiming();
1551 string min;
1552 string max;
1553 const int kMaxLen = 16;
1554 for (int i = 0; i < iter; i++) {
1555 CHECK(re.PossibleMatchRange(&min, &max, kMaxLen));
1556 }
1557 }
1558
PossibleMatchRange_Trivial(int i)1559 void PossibleMatchRange_Trivial(int i) {
1560 PossibleMatchRangeCommon(i, ".*");
1561 }
PossibleMatchRange_Complex(int i)1562 void PossibleMatchRange_Complex(int i) {
1563 PossibleMatchRangeCommon(i, "^abc[def]?[gh]{1,2}.*");
1564 }
PossibleMatchRange_Prefix(int i)1565 void PossibleMatchRange_Prefix(int i) {
1566 PossibleMatchRangeCommon(i, "^some_random_prefix.*");
1567 }
PossibleMatchRange_NoProg(int i)1568 void PossibleMatchRange_NoProg(int i) {
1569 PossibleMatchRangeCommon(i, "^some_random_string$");
1570 }
1571
1572 BENCHMARK(PossibleMatchRange_Trivial);
1573 BENCHMARK(PossibleMatchRange_Complex);
1574 BENCHMARK(PossibleMatchRange_Prefix);
1575 BENCHMARK(PossibleMatchRange_NoProg);
1576
1577 } // namespace re2
1578