xref: /aosp_15_r20/external/perfetto/src/trace_processor/util/glob.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2022 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
19 
20 #include <optional>
21 #include <string_view>
22 #include <vector>
23 
24 #include "perfetto/ext/base/small_vector.h"
25 #include "perfetto/ext/base/string_splitter.h"
26 #include "perfetto/ext/base/string_view.h"
27 
28 namespace perfetto {
29 namespace trace_processor {
30 namespace util {
31 
32 // Lightweight implementation of matching on UNIX glob patterns, maintaining
33 // compatibility of syntax and semantics used by SQLite.
34 //
35 // Usage:
36 //  GlobMatcher matcher = GlobMatcher::FromPattern("*foo*");
37 //  for (auto string : strings) {
38 //    if (matcher.Matches(string)) {
39 //      <do something>
40 //    }
41 //  }
42 //
43 // This is a class instead of a free function to allow preprocessing the
44 // pattern (e.g. to compute Kleene star offsets). This can create big savings
45 // because trace processsor needs to match the same pattern on many strings
46 // when filtering tables.
47 //
48 // Implementation:
49 // The algorithm used in this class is similar to the "alternative"
50 // algorithm proposed in [1].
51 //
52 // We preprocess the pattern (in the constructor) to split the pattern on *,
53 // accounting for character classes. This breaks the pattern in "segments": our
54 // name for the parts of the pattern between the stars.
55 //
56 // Then at match time, we go through each segment and check if it matches part
57 // of the string. The number of character matched defines the search start-point
58 // for the next segment. As described in [1], we don't need to do any
59 // backtracking which removes the exponential component of the algorithm and
60 // consequently simplifies the code.
61 //
62 // The subtle parts are:
63 // 1) the first and last segments - they need to be "anchored" to the
64 //    beginning and end of the string respectively. If not, they fail the match
65 //    straight away.
66 // 2) leading/trailing stars: they counteract the above point and "unanchor"
67 //    the first and last segments respectively by allowing them to happen
68 //    somewhere after/before the beginning/end.
69 //
70 // [1] https://research.swtch.com/glob
71 class GlobMatcher {
72  public:
73   GlobMatcher(GlobMatcher&&) = default;
74   GlobMatcher& operator=(GlobMatcher&&) = default;
75 
76   // Creates a glob matcher from a pattern.
FromPattern(base::StringView pattern_str)77   static GlobMatcher FromPattern(base::StringView pattern_str) {
78     return GlobMatcher(std::move(pattern_str));
79   }
80 
81   // Checks the provided string against the pattern and returns whether it
82   // matches.
83   bool Matches(base::StringView input) const;
84 
85   // Returns whether the comparison should really be an equality comparison.
IsEquality()86   bool IsEquality() {
87     return !leading_star_ && !trailing_star_ && segments_.size() <= 1;
88   }
89 
90  private:
91   // Represents a portion of the pattern in between two * characters.
92   struct Segment {
93     // The portion of the pattern in the segment. Note that this will not
94     // contain a free '*' (i.e. outside a character class).
95     base::StringView pattern;
96 
97     // The number of consumed characters in an input string if this segment
98     // matches.
99     uint32_t matched_chars;
100   };
101 
102   // It would be very rare for a glob pattern to have more than 4 stars so
103   // reserve stack space for that many segments.
104   static constexpr uint32_t kMaxSegmentsOnStack = 4;
105 
106   explicit GlobMatcher(base::StringView pattern);
107 
108   GlobMatcher(const GlobMatcher&) = delete;
109   GlobMatcher& operator=(const GlobMatcher&) = delete;
110 
111   // Returns whether |input| starts with the pattern in |segment| following
112   // glob matching rules.
StartsWith(base::StringView input,const Segment & segment)113   bool StartsWith(base::StringView input, const Segment& segment) const {
114     if (!contains_char_class_or_question_) {
115       return input.StartsWith(segment.pattern);
116     }
117     return StartsWithSlow(input, segment);
118   }
119 
120   // Returns whether |input| ends with the pattern in |segment| following
121   // glob matching rules.
EndsWith(base::StringView input,const Segment & segment)122   bool EndsWith(base::StringView input, const Segment& segment) const {
123     if (!contains_char_class_or_question_) {
124       return input.EndsWith(segment.pattern);
125     }
126     // Ending with |segment| is the same as taking the substring of |in|
127     size_t start = input.size() - segment.matched_chars;
128     return StartsWithSlow(input.substr(start), segment);
129   }
130 
131   // Returns the index where |input| matches the pattern in |segment|
132   // following glob matching rules or base::StringView::npos, if no such index
133   // exists.
Find(base::StringView input,const Segment & segment,size_t start)134   size_t Find(base::StringView input,
135               const Segment& segment,
136               size_t start) const {
137     if (!contains_char_class_or_question_) {
138       return input.find(segment.pattern, start);
139     }
140     for (uint32_t i = 0; i < input.size(); ++i) {
141       if (StartsWithSlow(input.substr(i), segment)) {
142         return i;
143       }
144     }
145     return base::StringView::npos;
146   }
147 
148   // Given a StringView starting at the boundary of a character class, returns
149   // a StringView containing only the parts inside the [] or base::StringView()
150   // if no character class exists.
151   static base::StringView ExtractCharacterClass(base::StringView input);
152 
153   // Matches |in| against the given character class.
154   static bool MatchesCharacterClass(char input, base::StringView char_class);
155 
156   bool StartsWithSlow(base::StringView input, const Segment& segment) const;
157 
158   // IMPORTANT: this should *not* be modified after the constructor as we store
159   // pointers to the data inside here.
160   // Note: this vector also allocates space for the null-terminator so is +1
161   // the "traditional" size of the string.
162   std::vector<char> pattern_;
163 
164   // Chunks of the |pattern_| tokenized on '*'. See the class comment for more
165   // info.
166   base::SmallVector<Segment, kMaxSegmentsOnStack> segments_;
167 
168   bool leading_star_ = false;
169   bool trailing_star_ = false;
170   bool contains_char_class_or_question_ = false;
171 };
172 
173 }  // namespace util
174 }  // namespace trace_processor
175 }  // namespace perfetto
176 
177 #endif  // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_
178