// Copyright 2016 The RE2 Authors. All Rights Reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. #include #include #include #include #include #include #include "re2/prefilter.h" #include "re2/re2.h" using re2::StringPiece; using std::string; // NOT static, NOT signed. uint8_t dummy = 0; void Test(StringPiece pattern, const RE2::Options& options, StringPiece text) { RE2 re(pattern, options); if (!re.ok()) return; // Don't waste time fuzzing high-size programs. // They can cause bug reports due to fuzzer timeouts. int size = re.ProgramSize(); if (size > 9999) return; int rsize = re.ReverseProgramSize(); if (rsize > 9999) return; // Don't waste time fuzzing high-fanout programs. // They can cause bug reports due to fuzzer timeouts. std::map histogram; int fanout = re.ProgramFanout(&histogram); if (fanout > 9) return; int rfanout = re.ReverseProgramFanout(&histogram); if (rfanout > 9) return; // Don't waste time fuzzing programs with large substrings. // They can cause bug reports due to fuzzer timeouts when they // are repetitions (e.g. hundreds of NUL bytes) and matching is // unanchored. And they aren't interesting for fuzzing purposes. std::unique_ptr prefilter(re2::Prefilter::FromRE2(&re)); if (prefilter == nullptr) return; std::queue nodes; nodes.push(prefilter.get()); while (!nodes.empty()) { re2::Prefilter* node = nodes.front(); nodes.pop(); if (node->op() == re2::Prefilter::ATOM) { if (node->atom().size() > 9) return; } else if (node->op() == re2::Prefilter::AND || node->op() == re2::Prefilter::OR) { for (re2::Prefilter* sub : *node->subs()) nodes.push(sub); } } if (re.NumberOfCapturingGroups() == 0) { // Avoid early return due to too many arguments. StringPiece sp = text; RE2::FullMatch(sp, re); RE2::PartialMatch(sp, re); RE2::Consume(&sp, re); sp = text; // Reset. RE2::FindAndConsume(&sp, re); } else { // Okay, we have at least one capturing group... // Try conversion for variously typed arguments. StringPiece sp = text; short s; RE2::FullMatch(sp, re, &s); long l; RE2::PartialMatch(sp, re, &l); float f; RE2::Consume(&sp, re, &f); sp = text; // Reset. double d; RE2::FindAndConsume(&sp, re, &d); } string s = string(text); RE2::Replace(&s, re, ""); s = string(text); // Reset. RE2::GlobalReplace(&s, re, ""); string min, max; re.PossibleMatchRange(&min, &max, /*maxlen=*/9); // Exercise some other API functionality. dummy += re.NamedCapturingGroups().size(); dummy += re.CapturingGroupNames().size(); dummy += RE2::QuoteMeta(pattern).size(); } // Entry point for libFuzzer. extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size) { if (size == 0 || size > 999) return 0; // Crudely limit the use of ., \p, \P, \d, \D, \s, \S, \w and \W. // Otherwise, we will waste time on inputs that have long runs of various // character classes. The fuzzer has shown itself to be easily capable of // generating such patterns that fall within the other limits, but result // in timeouts nonetheless. The marginal cost is high - even more so when // counted repetition is involved - whereas the marginal benefit is zero. // TODO(junyer): Handle [:isalnum:] et al. when they start to cause pain. int cc = 0; for (size_t i = 0; i < size; i++) { if (data[i] == '.') cc++; if (data[i] != '\\') continue; i++; if (i >= size) break; if (data[i] == 'p' || data[i] == 'P' || data[i] == 'd' || data[i] == 'D' || data[i] == 's' || data[i] == 'S' || data[i] == 'w' || data[i] == 'W') cc++; } if (cc > 9) return 0; // The one-at-a-time hash by Bob Jenkins. uint32_t hash = 0; for (size_t i = 0; i < size; i++) { hash += data[i]; hash += (hash << 10); hash ^= (hash >> 6); } hash += (hash << 3); hash ^= (hash >> 11); hash += (hash << 15); RE2::Options options; options.set_log_errors(false); options.set_max_mem(64 << 20); options.set_encoding(hash & 1 ? RE2::Options::EncodingLatin1 : RE2::Options::EncodingUTF8); options.set_posix_syntax(hash & 2); options.set_longest_match(hash & 4); options.set_literal(hash & 8); options.set_never_nl(hash & 16); options.set_dot_nl(hash & 32); options.set_never_capture(hash & 64); options.set_case_sensitive(hash & 128); options.set_perl_classes(hash & 256); options.set_word_boundary(hash & 512); options.set_one_line(hash & 1024); const char* ptr = reinterpret_cast(data); int len = static_cast(size); StringPiece pattern(ptr, len); StringPiece text(ptr, len); Test(pattern, options, text); return 0; }