1 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
2 // -*- mode: C++ -*-
3 //
4 // Copyright 2022-2023 Google LLC
5 //
6 // Licensed under the Apache License v2.0 with LLVM Exceptions (the
7 // "License"); you may not use this file except in compliance with the
8 // License. You may obtain a copy of the License at
9 //
10 // https://llvm.org/LICENSE.txt
11 //
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
18 // Author: Giuliano Procida
19
20 #include "filter.h"
21
22 #include <fnmatch.h>
23
24 #include <cctype>
25 #include <cerrno>
26 #include <cstddef>
27 #include <cstring>
28 #include <fstream>
29 #include <memory>
30 #include <ostream>
31 #include <queue>
32 #include <sstream>
33 #include <string>
34 #include <string_view>
35 #include <unordered_set>
36 #include <utility>
37
38 #include "error.h"
39
40 namespace stg {
41 namespace {
42
43 using Items = std::unordered_set<std::string>;
44
ReadAbigail(const std::string & filename)45 Items ReadAbigail(const std::string& filename) {
46 static constexpr std::string_view kSectionSuffix = "list";
47 Items items;
48 std::ifstream file(filename);
49 Check(file.good()) << "error opening filter file '" << filename << ": "
50 << Error(errno);
51 bool in_filter_section = false;
52 std::string line;
53 while (std::getline(file, line)) {
54 size_t start = 0;
55 size_t limit = line.size();
56 // Strip leading whitespace.
57 while (start < limit && std::isspace(line[start])) {
58 ++start;
59 }
60 // Skip comment lines.
61 if (start < limit && line[start] == '#') {
62 continue;
63 }
64 // Strip trailing whitespace.
65 while (start < limit && std::isspace(line[limit - 1])) {
66 --limit;
67 }
68 // Skip empty lines.
69 if (start == limit) {
70 continue;
71 }
72 // See if we are entering a filter list section.
73 if (line[start] == '[' && line[limit - 1] == ']') {
74 const std::string_view section(&line[start + 1], limit - start - 2);
75 in_filter_section = section.ends_with(kSectionSuffix);
76 continue;
77 }
78 // Add item.
79 if (in_filter_section) {
80 items.insert(std::string(&line[start], limit - start));
81 }
82 }
83 Check(file.eof()) << "error reading filter file '" << filename << ": "
84 << Error(errno);
85 return items;
86 }
87
88 // Inverted filter.
89 class NotFilter : public Filter {
90 public:
NotFilter(std::unique_ptr<Filter> filter)91 explicit NotFilter(std::unique_ptr<Filter> filter)
92 : filter_(std::move(filter)) {}
operator ()(const std::string & item) const93 bool operator()(const std::string& item) const final {
94 return !(*filter_)(item);
95 };
96
97 private:
98 const std::unique_ptr<Filter> filter_;
99 };
100
101 // Conjunction of filters.
102 class AndFilter : public Filter {
103 public:
AndFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)104 AndFilter(std::unique_ptr<Filter> filter1,
105 std::unique_ptr<Filter> filter2)
106 : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const107 bool operator()(const std::string& item) const final {
108 return (*filter1_)(item) && (*filter2_)(item);
109 };
110
111 private:
112 const std::unique_ptr<Filter> filter1_;
113 const std::unique_ptr<Filter> filter2_;
114 };
115
116 // Disjunction of filters.
117 class OrFilter : public Filter {
118 public:
OrFilter(std::unique_ptr<Filter> filter1,std::unique_ptr<Filter> filter2)119 OrFilter(std::unique_ptr<Filter> filter1,
120 std::unique_ptr<Filter> filter2)
121 : filter1_(std::move(filter1)), filter2_(std::move(filter2)) {}
operator ()(const std::string & item) const122 bool operator()(const std::string& item) const final {
123 return (*filter1_)(item) || (*filter2_)(item);
124 };
125
126 private:
127 const std::unique_ptr<Filter> filter1_;
128 const std::unique_ptr<Filter> filter2_;
129 };
130
131 // Glob filter.
132 class GlobFilter : public Filter {
133 public:
GlobFilter(const std::string & pattern)134 explicit GlobFilter(const std::string& pattern) : pattern_(pattern) {}
operator ()(const std::string & item) const135 bool operator()(const std::string& item) const final {
136 return fnmatch(pattern_.c_str(), item.c_str(), 0) == 0;
137 }
138
139 private:
140 const std::string pattern_;
141 };
142
143 // Literal list filter.
144 class SetFilter : public Filter {
145 public:
SetFilter(Items && items)146 explicit SetFilter(Items&& items)
147 : items_(std::move(items)) {}
operator ()(const std::string & item) const148 bool operator()(const std::string& item) const final {
149 return items_.contains(item);
150 };
151
152 private:
153 const Items items_;
154 };
155
156 const char* kTokenCharacters = ":!()&|";
157
158 // Split a filter expression into tokens.
159 //
160 // All tokens are just strings, but single characters from kTokenCharacters are
161 // recognised as special syntax. Whitespace can be used between tokens and will
162 // be ignored.
Tokenise(const std::string & filter)163 std::queue<std::string> Tokenise(const std::string& filter) {
164 std::queue<std::string> result;
165 auto it = filter.begin();
166 const auto end = filter.end();
167 while (it != end) {
168 if (std::isspace(*it)) {
169 ++it;
170 } else if (std::strchr(kTokenCharacters, *it)) {
171 result.emplace(&*it, 1);
172 ++it;
173 } else if (std::isgraph(*it)) {
174 auto name = it;
175 ++it;
176 while (it != end && std::isgraph(*it)
177 && !std::strchr(kTokenCharacters, *it)) {
178 ++it;
179 }
180 result.emplace(&*name, it - name);
181 } else {
182 Die() << "unexpected character in filter: '" << *it;
183 }
184 }
185
186 return result;
187 }
188
189 // The failing parser.
Fail(const std::string & message,std::queue<std::string> & tokens)190 std::unique_ptr<Filter> Fail(
191 const std::string& message, std::queue<std::string>& tokens) {
192 std::ostringstream os;
193 os << "syntax error in filter expression: '" << message << "'; context:";
194 for (size_t i = 0; i < 3; ++i) {
195 os << ' ';
196 if (tokens.empty()) {
197 os << "<end>";
198 break;
199 }
200 os << '"' << tokens.front() << '"';
201 tokens.pop();
202 }
203 Die() << os.str();
204 }
205
206 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens);
207
208 // Parse a filter atom.
Atom(std::queue<std::string> & tokens)209 std::unique_ptr<Filter> Atom(std::queue<std::string>& tokens) {
210 if (tokens.empty()) {
211 return Fail("expected a filter expression", tokens);
212 }
213 auto token = tokens.front();
214 tokens.pop();
215 if (token == "(") {
216 auto expression = Expression(tokens);
217 if (tokens.empty() || tokens.front() != ")") {
218 return Fail("expected a ')'", tokens);
219 }
220 tokens.pop();
221 return expression;
222 } else if (token == ":") {
223 if (tokens.empty()) {
224 return Fail("expected a file name", tokens);
225 }
226 token = tokens.front();
227 tokens.pop();
228 return std::make_unique<SetFilter>(ReadAbigail(token));
229 } else {
230 if (std::strchr(kTokenCharacters, token[0])) {
231 return Fail("expected a glob token", tokens);
232 }
233 return std::make_unique<GlobFilter>(token);
234 }
235 }
236
237 // Parse a filter factor.
Factor(std::queue<std::string> & tokens)238 std::unique_ptr<Filter> Factor(std::queue<std::string>& tokens) {
239 bool invert = false;
240 while (!tokens.empty() && tokens.front() == "!") {
241 tokens.pop();
242 invert = !invert;
243 }
244 auto atom = Atom(tokens);
245 if (invert) {
246 atom = std::make_unique<NotFilter>(std::move(atom));
247 }
248 return atom;
249 }
250
251 // Parse a filter term.
Term(std::queue<std::string> & tokens)252 std::unique_ptr<Filter> Term(std::queue<std::string>& tokens) {
253 auto factor = Factor(tokens);
254 while (!tokens.empty() && tokens.front() == "&") {
255 tokens.pop();
256 factor = std::make_unique<AndFilter>(std::move(factor), Factor(tokens));
257 }
258 return factor;
259 }
260
261 // Parse a filter expression.
Expression(std::queue<std::string> & tokens)262 std::unique_ptr<Filter> Expression(std::queue<std::string>& tokens) {
263 auto term = Term(tokens);
264 while (!tokens.empty() && tokens.front() == "|") {
265 tokens.pop();
266 term = std::make_unique<OrFilter>(std::move(term), Term(tokens));
267 }
268 return term;
269 }
270
271 } // namespace
272
273 // Tokenise and parse a filter expression.
MakeFilter(const std::string & filter)274 std::unique_ptr<Filter> MakeFilter(const std::string& filter)
275 {
276 auto tokens = Tokenise(filter);
277 auto result = Expression(tokens);
278 if (!tokens.empty()) {
279 return Fail("unexpected junk at end of filter", tokens);
280 }
281 return result;
282 }
283
FilterUsage(std::ostream & os)284 void FilterUsage(std::ostream& os) {
285 os << "filter syntax:\n"
286 << " <filter> ::= <term> | <expression> '|' <term>\n"
287 << " <term> ::= <factor> | <term> '&' <factor>\n"
288 << " <factor> ::= <atom> | '!' <factor>\n"
289 << " <atom> ::= ':' <filename> | <glob> | '(' <expression> ')'\n"
290 << " <filename> ::= <string>\n"
291 << " <glob> ::= <string>\n";
292 }
293
294 } // namespace stg
295