xref: /aosp_15_r20/external/cronet/third_party/re2/src/re2/set.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1 // Copyright 2010 The RE2 Authors.  All Rights Reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4 
5 #include "re2/set.h"
6 
7 #include <stddef.h>
8 #include <algorithm>
9 #include <memory>
10 #include <utility>
11 
12 #include "util/logging.h"
13 #include "re2/pod_array.h"
14 #include "re2/prog.h"
15 #include "re2/re2.h"
16 #include "re2/regexp.h"
17 
18 namespace re2 {
19 
Set(const RE2::Options & options,RE2::Anchor anchor)20 RE2::Set::Set(const RE2::Options& options, RE2::Anchor anchor)
21     : options_(options),
22       anchor_(anchor),
23       compiled_(false),
24       size_(0) {
25   options_.set_never_capture(true);  // might unblock some optimisations
26 }
27 
~Set()28 RE2::Set::~Set() {
29   for (size_t i = 0; i < elem_.size(); i++)
30     elem_[i].second->Decref();
31 }
32 
Set(Set && other)33 RE2::Set::Set(Set&& other)
34     : options_(other.options_),
35       anchor_(other.anchor_),
36       elem_(std::move(other.elem_)),
37       compiled_(other.compiled_),
38       size_(other.size_),
39       prog_(std::move(other.prog_)) {
40   other.elem_.clear();
41   other.elem_.shrink_to_fit();
42   other.compiled_ = false;
43   other.size_ = 0;
44   other.prog_.reset();
45 }
46 
operator =(Set && other)47 RE2::Set& RE2::Set::operator=(Set&& other) {
48   this->~Set();
49   (void) new (this) Set(std::move(other));
50   return *this;
51 }
52 
Add(absl::string_view pattern,std::string * error)53 int RE2::Set::Add(absl::string_view pattern, std::string* error) {
54   if (compiled_) {
55     LOG(DFATAL) << "RE2::Set::Add() called after compiling";
56     return -1;
57   }
58 
59   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
60     options_.ParseFlags());
61   RegexpStatus status;
62   re2::Regexp* re = Regexp::Parse(pattern, pf, &status);
63   if (re == NULL) {
64     if (error != NULL)
65       *error = status.Text();
66     if (options_.log_errors())
67       LOG(ERROR) << "Error parsing '" << pattern << "': " << status.Text();
68     return -1;
69   }
70 
71   // Concatenate with match index and push on vector.
72   int n = static_cast<int>(elem_.size());
73   re2::Regexp* m = re2::Regexp::HaveMatch(n, pf);
74   if (re->op() == kRegexpConcat) {
75     int nsub = re->nsub();
76     PODArray<re2::Regexp*> sub(nsub + 1);
77     for (int i = 0; i < nsub; i++)
78       sub[i] = re->sub()[i]->Incref();
79     sub[nsub] = m;
80     re->Decref();
81     re = re2::Regexp::Concat(sub.data(), nsub + 1, pf);
82   } else {
83     re2::Regexp* sub[2];
84     sub[0] = re;
85     sub[1] = m;
86     re = re2::Regexp::Concat(sub, 2, pf);
87   }
88   elem_.emplace_back(std::string(pattern), re);
89   return n;
90 }
91 
Compile()92 bool RE2::Set::Compile() {
93   if (compiled_) {
94     LOG(DFATAL) << "RE2::Set::Compile() called more than once";
95     return false;
96   }
97   compiled_ = true;
98   size_ = static_cast<int>(elem_.size());
99 
100   // Sort the elements by their patterns. This is good enough for now
101   // until we have a Regexp comparison function. (Maybe someday...)
102   std::sort(elem_.begin(), elem_.end(),
103             [](const Elem& a, const Elem& b) -> bool {
104               return a.first < b.first;
105             });
106 
107   PODArray<re2::Regexp*> sub(size_);
108   for (int i = 0; i < size_; i++)
109     sub[i] = elem_[i].second;
110   elem_.clear();
111   elem_.shrink_to_fit();
112 
113   Regexp::ParseFlags pf = static_cast<Regexp::ParseFlags>(
114     options_.ParseFlags());
115   re2::Regexp* re = re2::Regexp::Alternate(sub.data(), size_, pf);
116 
117   prog_.reset(Prog::CompileSet(re, anchor_, options_.max_mem()));
118   re->Decref();
119   return prog_ != nullptr;
120 }
121 
Match(absl::string_view text,std::vector<int> * v) const122 bool RE2::Set::Match(absl::string_view text, std::vector<int>* v) const {
123   return Match(text, v, NULL);
124 }
125 
Match(absl::string_view text,std::vector<int> * v,ErrorInfo * error_info) const126 bool RE2::Set::Match(absl::string_view text, std::vector<int>* v,
127                      ErrorInfo* error_info) const {
128   if (!compiled_) {
129     if (error_info != NULL)
130       error_info->kind = kNotCompiled;
131     LOG(DFATAL) << "RE2::Set::Match() called before compiling";
132     return false;
133   }
134 #ifdef RE2_HAVE_THREAD_LOCAL
135   hooks::context = NULL;
136 #endif
137   bool dfa_failed = false;
138   std::unique_ptr<SparseSet> matches;
139   if (v != NULL) {
140     matches.reset(new SparseSet(size_));
141     v->clear();
142   }
143   bool ret = prog_->SearchDFA(text, text, Prog::kAnchored, Prog::kManyMatch,
144                               NULL, &dfa_failed, matches.get());
145   if (dfa_failed) {
146     if (options_.log_errors())
147       LOG(ERROR) << "DFA out of memory: "
148                  << "program size " << prog_->size() << ", "
149                  << "list count " << prog_->list_count() << ", "
150                  << "bytemap range " << prog_->bytemap_range();
151     if (error_info != NULL)
152       error_info->kind = kOutOfMemory;
153     return false;
154   }
155   if (ret == false) {
156     if (error_info != NULL)
157       error_info->kind = kNoError;
158     return false;
159   }
160   if (v != NULL) {
161     if (matches->empty()) {
162       if (error_info != NULL)
163         error_info->kind = kInconsistent;
164       LOG(DFATAL) << "RE2::Set::Match() matched, but no matches returned?!";
165       return false;
166     }
167     v->assign(matches->begin(), matches->end());
168   }
169   if (error_info != NULL)
170     error_info->kind = kNoError;
171   return true;
172 }
173 
174 }  // namespace re2
175