xref: /aosp_15_r20/external/regex-re2/re2/set.cc (revision ccdc9c3e24c519bfa4832a66aa2e83a52c19f295)
1*ccdc9c3eSSadaf Ebrahimi // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2*ccdc9c3eSSadaf Ebrahimi // Use of this source code is governed by a BSD-style
3*ccdc9c3eSSadaf Ebrahimi // license that can be found in the LICENSE file.
4*ccdc9c3eSSadaf Ebrahimi 
5*ccdc9c3eSSadaf Ebrahimi #include "re2/set.h"
6*ccdc9c3eSSadaf Ebrahimi 
7*ccdc9c3eSSadaf Ebrahimi #include <stddef.h>
8*ccdc9c3eSSadaf Ebrahimi #include <algorithm>
9*ccdc9c3eSSadaf Ebrahimi #include <memory>
10*ccdc9c3eSSadaf Ebrahimi 
11*ccdc9c3eSSadaf Ebrahimi #include "util/util.h"
12*ccdc9c3eSSadaf Ebrahimi #include "util/logging.h"
13*ccdc9c3eSSadaf Ebrahimi #include "util/pod_array.h"
14*ccdc9c3eSSadaf Ebrahimi #include "re2/stringpiece.h"
15*ccdc9c3eSSadaf Ebrahimi #include "re2/prog.h"
16*ccdc9c3eSSadaf Ebrahimi #include "re2/re2.h"
17*ccdc9c3eSSadaf Ebrahimi #include "re2/regexp.h"
18*ccdc9c3eSSadaf Ebrahimi 
19*ccdc9c3eSSadaf Ebrahimi namespace re2 {
20*ccdc9c3eSSadaf Ebrahimi 
Set(const RE2::Options & options,RE2::Anchor anchor)21*ccdc9c3eSSadaf Ebrahimi RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor) {
22*ccdc9c3eSSadaf Ebrahimi   options_.Copy(options);
23*ccdc9c3eSSadaf Ebrahimi   options_.set_never_capture(true);  // might unblock some optimisations
24*ccdc9c3eSSadaf Ebrahimi   anchor_ = anchor;
25*ccdc9c3eSSadaf Ebrahimi   prog_ = NULL;
26*ccdc9c3eSSadaf Ebrahimi   compiled_ = false;
27*ccdc9c3eSSadaf Ebrahimi   size_ = 0;
28*ccdc9c3eSSadaf Ebrahimi }
29*ccdc9c3eSSadaf Ebrahimi 
~Set()30*ccdc9c3eSSadaf Ebrahimi RE2::Set::~Set() {
31*ccdc9c3eSSadaf Ebrahimi   for (size_t i = 0; i < elem_.size(); i++)
32*ccdc9c3eSSadaf Ebrahimi     elem_[i].second->Decref();
33*ccdc9c3eSSadaf Ebrahimi   delete prog_;
34*ccdc9c3eSSadaf Ebrahimi }
35*ccdc9c3eSSadaf Ebrahimi 
Add(const StringPiece & pattern,string * error)36*ccdc9c3eSSadaf Ebrahimi int RE2::Set::Add(const StringPiece& pattern, string* error) {
37*ccdc9c3eSSadaf Ebrahimi   if (compiled_) {
38*ccdc9c3eSSadaf Ebrahimi     LOG(DFATAL) << "RE2::Set::Add() called after compiling";
39*ccdc9c3eSSadaf Ebrahimi     return -1;
40*ccdc9c3eSSadaf Ebrahimi   }
41*ccdc9c3eSSadaf Ebrahimi 
42*ccdc9c3eSSadaf Ebrahimi   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
43*ccdc9c3eSSadaf Ebrahimi     options_.ParseFlags());
44*ccdc9c3eSSadaf Ebrahimi   RegexpStatus status;
45*ccdc9c3eSSadaf Ebrahimi   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
46*ccdc9c3eSSadaf Ebrahimi   if (re == NULL) {
47*ccdc9c3eSSadaf Ebrahimi     if (error != NULL)
48*ccdc9c3eSSadaf Ebrahimi       *error = status.Text();
49*ccdc9c3eSSadaf Ebrahimi     if (options_.log_errors())
50*ccdc9c3eSSadaf Ebrahimi       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
51*ccdc9c3eSSadaf Ebrahimi     return -1;
52*ccdc9c3eSSadaf Ebrahimi   }
53*ccdc9c3eSSadaf Ebrahimi 
54*ccdc9c3eSSadaf Ebrahimi   // Concatenate with match index and push on vector.
55*ccdc9c3eSSadaf Ebrahimi   int n = static_cast<int>(elem_.size());
56*ccdc9c3eSSadaf Ebrahimi   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
57*ccdc9c3eSSadaf Ebrahimi   if (re->op() == kRegexpConcat) {
58*ccdc9c3eSSadaf Ebrahimi     int nsub = re->nsub();
59*ccdc9c3eSSadaf Ebrahimi     PODArray<re2::Regexp*> sub(nsub + 1);
60*ccdc9c3eSSadaf Ebrahimi     for (int i = 0; i < nsub; i++)
61*ccdc9c3eSSadaf Ebrahimi       sub[i] = re->sub()[i]->Incref();
62*ccdc9c3eSSadaf Ebrahimi     sub[nsub] = m;
63*ccdc9c3eSSadaf Ebrahimi     re->Decref();
64*ccdc9c3eSSadaf Ebrahimi     re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
65*ccdc9c3eSSadaf Ebrahimi   } else {
66*ccdc9c3eSSadaf Ebrahimi     re2::Regexp* sub[2];
67*ccdc9c3eSSadaf Ebrahimi     sub[0] = re;
68*ccdc9c3eSSadaf Ebrahimi     sub[1] = m;
69*ccdc9c3eSSadaf Ebrahimi     re = re2::Regexp::Concat(sub, 2, pf);
70*ccdc9c3eSSadaf Ebrahimi   }
71*ccdc9c3eSSadaf Ebrahimi   elem_.emplace_back(string(pattern), re);
72*ccdc9c3eSSadaf Ebrahimi   return n;
73*ccdc9c3eSSadaf Ebrahimi }
74*ccdc9c3eSSadaf Ebrahimi 
Compile()75*ccdc9c3eSSadaf Ebrahimi bool RE2::Set::Compile() {
76*ccdc9c3eSSadaf Ebrahimi   if (compiled_) {
77*ccdc9c3eSSadaf Ebrahimi     LOG(DFATAL) << "RE2::Set::Compile() called more than once";
78*ccdc9c3eSSadaf Ebrahimi     return false;
79*ccdc9c3eSSadaf Ebrahimi   }
80*ccdc9c3eSSadaf Ebrahimi   compiled_ = true;
81*ccdc9c3eSSadaf Ebrahimi   size_ = static_cast<int>(elem_.size());
82*ccdc9c3eSSadaf Ebrahimi 
83*ccdc9c3eSSadaf Ebrahimi   // Sort the elements by their patterns. This is good enough for now
84*ccdc9c3eSSadaf Ebrahimi   // until we have a Regexp comparison function. (Maybe someday...)
85*ccdc9c3eSSadaf Ebrahimi   std::sort(elem_.begin(), elem_.end(),
86*ccdc9c3eSSadaf Ebrahimi             [](const Elem& a, const Elem& b) -> bool {
87*ccdc9c3eSSadaf Ebrahimi               return a.first < b.first;
88*ccdc9c3eSSadaf Ebrahimi             });
89*ccdc9c3eSSadaf Ebrahimi 
90*ccdc9c3eSSadaf Ebrahimi   PODArray<re2::Regexp*> sub(size_);
91*ccdc9c3eSSadaf Ebrahimi   for (int i = 0; i < size_; i++)
92*ccdc9c3eSSadaf Ebrahimi     sub[i] = elem_[i].second;
93*ccdc9c3eSSadaf Ebrahimi   elem_.clear();
94*ccdc9c3eSSadaf Ebrahimi   elem_.shrink_to_fit();
95*ccdc9c3eSSadaf Ebrahimi 
96*ccdc9c3eSSadaf Ebrahimi   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
97*ccdc9c3eSSadaf Ebrahimi     options_.ParseFlags());
98*ccdc9c3eSSadaf Ebrahimi   re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
99*ccdc9c3eSSadaf Ebrahimi 
100*ccdc9c3eSSadaf Ebrahimi   prog_ = Prog::CompileSet(re, anchor_, options_.max_mem());
101*ccdc9c3eSSadaf Ebrahimi   re->Decref();
102*ccdc9c3eSSadaf Ebrahimi   return prog_ != NULL;
103*ccdc9c3eSSadaf Ebrahimi }
104*ccdc9c3eSSadaf Ebrahimi 
Match(const StringPiece & text,std::vector<int> * v) const105*ccdc9c3eSSadaf Ebrahimi bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v) const {
106*ccdc9c3eSSadaf Ebrahimi   return Match(text, v, NULL);
107*ccdc9c3eSSadaf Ebrahimi }
108*ccdc9c3eSSadaf Ebrahimi 
Match(const StringPiece & text,std::vector<int> * v,ErrorInfo * error_info) const109*ccdc9c3eSSadaf Ebrahimi bool RE2::Set::Match(const StringPiece& text, std::vector<int>* v,
110*ccdc9c3eSSadaf Ebrahimi                      ErrorInfo* error_info) const {
111*ccdc9c3eSSadaf Ebrahimi   if (!compiled_) {
112*ccdc9c3eSSadaf Ebrahimi     LOG(DFATAL) << "RE2::Set::Match() called before compiling";
113*ccdc9c3eSSadaf Ebrahimi     if (error_info != NULL)
114*ccdc9c3eSSadaf Ebrahimi       error_info->kind = kNotCompiled;
115*ccdc9c3eSSadaf Ebrahimi     return false;
116*ccdc9c3eSSadaf Ebrahimi   }
117*ccdc9c3eSSadaf Ebrahimi   bool dfa_failed = false;
118*ccdc9c3eSSadaf Ebrahimi   std::unique_ptr<SparseSet> matches;
119*ccdc9c3eSSadaf Ebrahimi   if (v != NULL) {
120*ccdc9c3eSSadaf Ebrahimi     matches.reset(new SparseSet(size_));
121*ccdc9c3eSSadaf Ebrahimi     v->clear();
122*ccdc9c3eSSadaf Ebrahimi   }
123*ccdc9c3eSSadaf Ebrahimi   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
124*ccdc9c3eSSadaf Ebrahimi                               NULL, &dfa_failed, matches.get());
125*ccdc9c3eSSadaf Ebrahimi   if (dfa_failed) {
126*ccdc9c3eSSadaf Ebrahimi     if (options_.log_errors())
127*ccdc9c3eSSadaf Ebrahimi       LOG(ERROR) << "DFA out of memory: size " << prog_->size() << ", "
128*ccdc9c3eSSadaf Ebrahimi                  << "bytemap range " << prog_->bytemap_range() << ", "
129*ccdc9c3eSSadaf Ebrahimi                  << "list count " << prog_->list_count();
130*ccdc9c3eSSadaf Ebrahimi     if (error_info != NULL)
131*ccdc9c3eSSadaf Ebrahimi       error_info->kind = kOutOfMemory;
132*ccdc9c3eSSadaf Ebrahimi     return false;
133*ccdc9c3eSSadaf Ebrahimi   }
134*ccdc9c3eSSadaf Ebrahimi   if (ret == false) {
135*ccdc9c3eSSadaf Ebrahimi     if (error_info != NULL)
136*ccdc9c3eSSadaf Ebrahimi       error_info->kind = kNoError;
137*ccdc9c3eSSadaf Ebrahimi     return false;
138*ccdc9c3eSSadaf Ebrahimi   }
139*ccdc9c3eSSadaf Ebrahimi   if (v != NULL) {
140*ccdc9c3eSSadaf Ebrahimi     if (matches->empty()) {
141*ccdc9c3eSSadaf Ebrahimi       LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
142*ccdc9c3eSSadaf Ebrahimi       if (error_info != NULL)
143*ccdc9c3eSSadaf Ebrahimi         error_info->kind = kInconsistent;
144*ccdc9c3eSSadaf Ebrahimi       return false;
145*ccdc9c3eSSadaf Ebrahimi     }
146*ccdc9c3eSSadaf Ebrahimi     v->assign(matches->begin(), matches->end());
147*ccdc9c3eSSadaf Ebrahimi   }
148*ccdc9c3eSSadaf Ebrahimi   if (error_info != NULL)
149*ccdc9c3eSSadaf Ebrahimi     error_info->kind = kNoError;
150*ccdc9c3eSSadaf Ebrahimi   return true;
151*ccdc9c3eSSadaf Ebrahimi }
152*ccdc9c3eSSadaf Ebrahimi 
153*ccdc9c3eSSadaf Ebrahimi }  // namespace re2
154