1*ccdc9c3eSSadaf Ebrahimi // Copyright 2008 The RE2 Authors. All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi
5*ccdc9c3eSSadaf Ebrahimi // Random testing of regular expression matching.
6*ccdc9c3eSSadaf Ebrahimi
7*ccdc9c3eSSadaf Ebrahimi #include <stdio.h>
8*ccdc9c3eSSadaf Ebrahimi #include <string>
9*ccdc9c3eSSadaf Ebrahimi #include <vector>
10*ccdc9c3eSSadaf Ebrahimi
11*ccdc9c3eSSadaf Ebrahimi #include "util/test.h"
12*ccdc9c3eSSadaf Ebrahimi #include "re2/testing/exhaustive_tester.h"
13*ccdc9c3eSSadaf Ebrahimi
14*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(regexpseed, 404, "Random regexp seed.");
15*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(regexpcount, 100, "How many random regexps to generate.");
16*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(stringseed, 200, "Random string seed.");
17*ccdc9c3eSSadaf Ebrahimi DEFINE_int32(stringcount, 100, "How many random strings to generate.");
18*ccdc9c3eSSadaf Ebrahimi
19*ccdc9c3eSSadaf Ebrahimi namespace re2 {
20*ccdc9c3eSSadaf Ebrahimi
21*ccdc9c3eSSadaf Ebrahimi // Runs a random test on the given parameters.
22*ccdc9c3eSSadaf Ebrahimi // (Always uses the same random seeds for reproducibility.
23*ccdc9c3eSSadaf Ebrahimi // Can give different seeds on command line.)
RandomTest(int maxatoms,int maxops,const std::vector<string> & alphabet,const std::vector<string> & ops,int maxstrlen,const std::vector<string> & stralphabet,const string & wrapper)24*ccdc9c3eSSadaf Ebrahimi static void RandomTest(int maxatoms, int maxops,
25*ccdc9c3eSSadaf Ebrahimi const std::vector<string>& alphabet,
26*ccdc9c3eSSadaf Ebrahimi const std::vector<string>& ops,
27*ccdc9c3eSSadaf Ebrahimi int maxstrlen,
28*ccdc9c3eSSadaf Ebrahimi const std::vector<string>& stralphabet,
29*ccdc9c3eSSadaf Ebrahimi const string& wrapper) {
30*ccdc9c3eSSadaf Ebrahimi // Limit to smaller test cases in debug mode,
31*ccdc9c3eSSadaf Ebrahimi // because everything is so much slower.
32*ccdc9c3eSSadaf Ebrahimi if (RE2_DEBUG_MODE) {
33*ccdc9c3eSSadaf Ebrahimi maxatoms--;
34*ccdc9c3eSSadaf Ebrahimi maxops--;
35*ccdc9c3eSSadaf Ebrahimi maxstrlen /= 2;
36*ccdc9c3eSSadaf Ebrahimi }
37*ccdc9c3eSSadaf Ebrahimi
38*ccdc9c3eSSadaf Ebrahimi ExhaustiveTester t(maxatoms, maxops, alphabet, ops,
39*ccdc9c3eSSadaf Ebrahimi maxstrlen, stralphabet, wrapper, "");
40*ccdc9c3eSSadaf Ebrahimi t.RandomStrings(FLAGS_stringseed, FLAGS_stringcount);
41*ccdc9c3eSSadaf Ebrahimi t.GenerateRandom(FLAGS_regexpseed, FLAGS_regexpcount);
42*ccdc9c3eSSadaf Ebrahimi printf("%d regexps, %d tests, %d failures [%d/%d str]\n",
43*ccdc9c3eSSadaf Ebrahimi t.regexps(), t.tests(), t.failures(), maxstrlen, (int)stralphabet.size());
44*ccdc9c3eSSadaf Ebrahimi EXPECT_EQ(0, t.failures());
45*ccdc9c3eSSadaf Ebrahimi }
46*ccdc9c3eSSadaf Ebrahimi
47*ccdc9c3eSSadaf Ebrahimi // Tests random small regexps involving literals and egrep operators.
TEST(Random,SmallEgrepLiterals)48*ccdc9c3eSSadaf Ebrahimi TEST(Random, SmallEgrepLiterals) {
49*ccdc9c3eSSadaf Ebrahimi RandomTest(5, 5, Explode("abc."), RegexpGenerator::EgrepOps(),
50*ccdc9c3eSSadaf Ebrahimi 15, Explode("abc"),
51*ccdc9c3eSSadaf Ebrahimi "");
52*ccdc9c3eSSadaf Ebrahimi }
53*ccdc9c3eSSadaf Ebrahimi
54*ccdc9c3eSSadaf Ebrahimi // Tests random bigger regexps involving literals and egrep operators.
TEST(Random,BigEgrepLiterals)55*ccdc9c3eSSadaf Ebrahimi TEST(Random, BigEgrepLiterals) {
56*ccdc9c3eSSadaf Ebrahimi RandomTest(10, 10, Explode("abc."), RegexpGenerator::EgrepOps(),
57*ccdc9c3eSSadaf Ebrahimi 15, Explode("abc"),
58*ccdc9c3eSSadaf Ebrahimi "");
59*ccdc9c3eSSadaf Ebrahimi }
60*ccdc9c3eSSadaf Ebrahimi
61*ccdc9c3eSSadaf Ebrahimi // Tests random small regexps involving literals, capturing parens,
62*ccdc9c3eSSadaf Ebrahimi // and egrep operators.
TEST(Random,SmallEgrepCaptures)63*ccdc9c3eSSadaf Ebrahimi TEST(Random, SmallEgrepCaptures) {
64*ccdc9c3eSSadaf Ebrahimi RandomTest(5, 5, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
65*ccdc9c3eSSadaf Ebrahimi 15, Explode("abc"),
66*ccdc9c3eSSadaf Ebrahimi "");
67*ccdc9c3eSSadaf Ebrahimi }
68*ccdc9c3eSSadaf Ebrahimi
69*ccdc9c3eSSadaf Ebrahimi // Tests random bigger regexps involving literals, capturing parens,
70*ccdc9c3eSSadaf Ebrahimi // and egrep operators.
TEST(Random,BigEgrepCaptures)71*ccdc9c3eSSadaf Ebrahimi TEST(Random, BigEgrepCaptures) {
72*ccdc9c3eSSadaf Ebrahimi RandomTest(10, 10, Split(" ", "a (b) ."), RegexpGenerator::EgrepOps(),
73*ccdc9c3eSSadaf Ebrahimi 15, Explode("abc"),
74*ccdc9c3eSSadaf Ebrahimi "");
75*ccdc9c3eSSadaf Ebrahimi }
76*ccdc9c3eSSadaf Ebrahimi
77*ccdc9c3eSSadaf Ebrahimi // Tests random large complicated expressions, using all the possible
78*ccdc9c3eSSadaf Ebrahimi // operators, some literals, some parenthesized literals, and predefined
79*ccdc9c3eSSadaf Ebrahimi // character classes like \d. (Adding larger character classes would
80*ccdc9c3eSSadaf Ebrahimi // make for too many possibilities.)
TEST(Random,Complicated)81*ccdc9c3eSSadaf Ebrahimi TEST(Random, Complicated) {
82*ccdc9c3eSSadaf Ebrahimi std::vector<string> ops = Split(" ",
83*ccdc9c3eSSadaf Ebrahimi "%s%s %s|%s %s* %s*? %s+ %s+? %s? %s?? "
84*ccdc9c3eSSadaf Ebrahimi "%s{0} %s{0,} %s{1} %s{1,} %s{0,1} %s{0,2} %s{1,2} "
85*ccdc9c3eSSadaf Ebrahimi "%s{2} %s{2,} %s{3,4} %s{4,5}");
86*ccdc9c3eSSadaf Ebrahimi
87*ccdc9c3eSSadaf Ebrahimi // Use (?:\b) and (?:\B) instead of \b and \B,
88*ccdc9c3eSSadaf Ebrahimi // because PCRE rejects \b* but accepts (?:\b)*.
89*ccdc9c3eSSadaf Ebrahimi // Ditto ^ and $.
90*ccdc9c3eSSadaf Ebrahimi std::vector<string> atoms = Split(" ",
91*ccdc9c3eSSadaf Ebrahimi ". (?:^) (?:$) \\a \\f \\n \\r \\t \\v "
92*ccdc9c3eSSadaf Ebrahimi "\\d \\D \\s \\S \\w \\W (?:\\b) (?:\\B) "
93*ccdc9c3eSSadaf Ebrahimi "a (a) b c - \\\\");
94*ccdc9c3eSSadaf Ebrahimi std::vector<string> alphabet = Explode("abc123\001\002\003\t\r\n\v\f\a");
95*ccdc9c3eSSadaf Ebrahimi RandomTest(10, 10, atoms, ops, 20, alphabet, "");
96*ccdc9c3eSSadaf Ebrahimi }
97*ccdc9c3eSSadaf Ebrahimi
98*ccdc9c3eSSadaf Ebrahimi } // namespace re2
99*ccdc9c3eSSadaf Ebrahimi
100