1 /* 2 * Copyright (C) 2022 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 18 #define SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 19 20 #include <optional> 21 #include <string_view> 22 #include <vector> 23 24 #include "perfetto/ext/base/small_vector.h" 25 #include "perfetto/ext/base/string_splitter.h" 26 #include "perfetto/ext/base/string_view.h" 27 28 namespace perfetto { 29 namespace trace_processor { 30 namespace util { 31 32 // Lightweight implementation of matching on UNIX glob patterns, maintaining 33 // compatibility of syntax and semantics used by SQLite. 34 // 35 // Usage: 36 // GlobMatcher matcher = GlobMatcher::FromPattern("*foo*"); 37 // for (auto string : strings) { 38 // if (matcher.Matches(string)) { 39 // <do something> 40 // } 41 // } 42 // 43 // This is a class instead of a free function to allow preprocessing the 44 // pattern (e.g. to compute Kleene star offsets). This can create big savings 45 // because trace processsor needs to match the same pattern on many strings 46 // when filtering tables. 47 // 48 // Implementation: 49 // The algorithm used in this class is similar to the "alternative" 50 // algorithm proposed in [1]. 51 // 52 // We preprocess the pattern (in the constructor) to split the pattern on *, 53 // accounting for character classes. This breaks the pattern in "segments": our 54 // name for the parts of the pattern between the stars. 55 // 56 // Then at match time, we go through each segment and check if it matches part 57 // of the string. The number of character matched defines the search start-point 58 // for the next segment. As described in [1], we don't need to do any 59 // backtracking which removes the exponential component of the algorithm and 60 // consequently simplifies the code. 61 // 62 // The subtle parts are: 63 // 1) the first and last segments - they need to be "anchored" to the 64 // beginning and end of the string respectively. If not, they fail the match 65 // straight away. 66 // 2) leading/trailing stars: they counteract the above point and "unanchor" 67 // the first and last segments respectively by allowing them to happen 68 // somewhere after/before the beginning/end. 69 // 70 // [1] https://research.swtch.com/glob 71 class GlobMatcher { 72 public: 73 GlobMatcher(GlobMatcher&&) = default; 74 GlobMatcher& operator=(GlobMatcher&&) = default; 75 76 // Creates a glob matcher from a pattern. FromPattern(base::StringView pattern_str)77 static GlobMatcher FromPattern(base::StringView pattern_str) { 78 return GlobMatcher(std::move(pattern_str)); 79 } 80 81 // Checks the provided string against the pattern and returns whether it 82 // matches. 83 bool Matches(base::StringView input) const; 84 85 // Returns whether the comparison should really be an equality comparison. IsEquality()86 bool IsEquality() { 87 return !leading_star_ && !trailing_star_ && segments_.size() <= 1; 88 } 89 90 private: 91 // Represents a portion of the pattern in between two * characters. 92 struct Segment { 93 // The portion of the pattern in the segment. Note that this will not 94 // contain a free '*' (i.e. outside a character class). 95 base::StringView pattern; 96 97 // The number of consumed characters in an input string if this segment 98 // matches. 99 uint32_t matched_chars; 100 }; 101 102 // It would be very rare for a glob pattern to have more than 4 stars so 103 // reserve stack space for that many segments. 104 static constexpr uint32_t kMaxSegmentsOnStack = 4; 105 106 explicit GlobMatcher(base::StringView pattern); 107 108 GlobMatcher(const GlobMatcher&) = delete; 109 GlobMatcher& operator=(const GlobMatcher&) = delete; 110 111 // Returns whether |input| starts with the pattern in |segment| following 112 // glob matching rules. StartsWith(base::StringView input,const Segment & segment)113 bool StartsWith(base::StringView input, const Segment& segment) const { 114 if (!contains_char_class_or_question_) { 115 return input.StartsWith(segment.pattern); 116 } 117 return StartsWithSlow(input, segment); 118 } 119 120 // Returns whether |input| ends with the pattern in |segment| following 121 // glob matching rules. EndsWith(base::StringView input,const Segment & segment)122 bool EndsWith(base::StringView input, const Segment& segment) const { 123 if (!contains_char_class_or_question_) { 124 return input.EndsWith(segment.pattern); 125 } 126 // Ending with |segment| is the same as taking the substring of |in| 127 size_t start = input.size() - segment.matched_chars; 128 return StartsWithSlow(input.substr(start), segment); 129 } 130 131 // Returns the index where |input| matches the pattern in |segment| 132 // following glob matching rules or base::StringView::npos, if no such index 133 // exists. Find(base::StringView input,const Segment & segment,size_t start)134 size_t Find(base::StringView input, 135 const Segment& segment, 136 size_t start) const { 137 if (!contains_char_class_or_question_) { 138 return input.find(segment.pattern, start); 139 } 140 for (uint32_t i = 0; i < input.size(); ++i) { 141 if (StartsWithSlow(input.substr(i), segment)) { 142 return i; 143 } 144 } 145 return base::StringView::npos; 146 } 147 148 // Given a StringView starting at the boundary of a character class, returns 149 // a StringView containing only the parts inside the [] or base::StringView() 150 // if no character class exists. 151 static base::StringView ExtractCharacterClass(base::StringView input); 152 153 // Matches |in| against the given character class. 154 static bool MatchesCharacterClass(char input, base::StringView char_class); 155 156 bool StartsWithSlow(base::StringView input, const Segment& segment) const; 157 158 // IMPORTANT: this should *not* be modified after the constructor as we store 159 // pointers to the data inside here. 160 // Note: this vector also allocates space for the null-terminator so is +1 161 // the "traditional" size of the string. 162 std::vector<char> pattern_; 163 164 // Chunks of the |pattern_| tokenized on '*'. See the class comment for more 165 // info. 166 base::SmallVector<Segment, kMaxSegmentsOnStack> segments_; 167 168 bool leading_star_ = false; 169 bool trailing_star_ = false; 170 bool contains_char_class_or_question_ = false; 171 }; 172 173 } // namespace util 174 } // namespace trace_processor 175 } // namespace perfetto 176 177 #endif // SRC_TRACE_PROCESSOR_UTIL_GLOB_H_ 178