xref: /aosp_15_r20/external/cronet/base/strings/pattern.cc (revision 6777b5387eb2ff775bb5750e3f5d96f37fb7352b)
1*6777b538SAndroid Build Coastguard Worker // Copyright 2015 The Chromium Authors
2*6777b538SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*6777b538SAndroid Build Coastguard Worker // found in the LICENSE file.
4*6777b538SAndroid Build Coastguard Worker 
5*6777b538SAndroid Build Coastguard Worker #include "base/strings/pattern.h"
6*6777b538SAndroid Build Coastguard Worker 
7*6777b538SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
8*6777b538SAndroid Build Coastguard Worker 
9*6777b538SAndroid Build Coastguard Worker namespace base {
10*6777b538SAndroid Build Coastguard Worker 
11*6777b538SAndroid Build Coastguard Worker namespace {
12*6777b538SAndroid Build Coastguard Worker 
IsWildcard(base_icu::UChar32 character)13*6777b538SAndroid Build Coastguard Worker constexpr bool IsWildcard(base_icu::UChar32 character) {
14*6777b538SAndroid Build Coastguard Worker   return character == '*' || character == '?';
15*6777b538SAndroid Build Coastguard Worker }
16*6777b538SAndroid Build Coastguard Worker 
17*6777b538SAndroid Build Coastguard Worker // Searches for the next subpattern of |pattern| in |string|, up to the given
18*6777b538SAndroid Build Coastguard Worker // |maximum_distance|. The subpattern extends from the start of |pattern| up to
19*6777b538SAndroid Build Coastguard Worker // the first wildcard character (or the end of the string). If the value of
20*6777b538SAndroid Build Coastguard Worker // |maximum_distance| is negative, the maximum distance is considered infinite.
21*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
SearchForChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,int maximum_distance,NEXT next)22*6777b538SAndroid Build Coastguard Worker constexpr bool SearchForChars(const CHAR** pattern,
23*6777b538SAndroid Build Coastguard Worker                               const CHAR* pattern_end,
24*6777b538SAndroid Build Coastguard Worker                               const CHAR** string,
25*6777b538SAndroid Build Coastguard Worker                               const CHAR* string_end,
26*6777b538SAndroid Build Coastguard Worker                               int maximum_distance,
27*6777b538SAndroid Build Coastguard Worker                               NEXT next) {
28*6777b538SAndroid Build Coastguard Worker   const CHAR* pattern_start = *pattern;
29*6777b538SAndroid Build Coastguard Worker   const CHAR* string_start = *string;
30*6777b538SAndroid Build Coastguard Worker   bool escape = false;
31*6777b538SAndroid Build Coastguard Worker   while (true) {
32*6777b538SAndroid Build Coastguard Worker     if (*pattern == pattern_end) {
33*6777b538SAndroid Build Coastguard Worker       // If this is the end of the pattern, only accept the end of the string;
34*6777b538SAndroid Build Coastguard Worker       // anything else falls through to the mismatch case.
35*6777b538SAndroid Build Coastguard Worker       if (*string == string_end)
36*6777b538SAndroid Build Coastguard Worker         return true;
37*6777b538SAndroid Build Coastguard Worker     } else {
38*6777b538SAndroid Build Coastguard Worker       // If we have found a wildcard, we're done.
39*6777b538SAndroid Build Coastguard Worker       if (!escape && IsWildcard(**pattern))
40*6777b538SAndroid Build Coastguard Worker         return true;
41*6777b538SAndroid Build Coastguard Worker 
42*6777b538SAndroid Build Coastguard Worker       // Check if the escape character is found. If so, skip it and move to the
43*6777b538SAndroid Build Coastguard Worker       // next character.
44*6777b538SAndroid Build Coastguard Worker       if (!escape && **pattern == '\\') {
45*6777b538SAndroid Build Coastguard Worker         escape = true;
46*6777b538SAndroid Build Coastguard Worker         next(pattern, pattern_end);
47*6777b538SAndroid Build Coastguard Worker         continue;
48*6777b538SAndroid Build Coastguard Worker       }
49*6777b538SAndroid Build Coastguard Worker 
50*6777b538SAndroid Build Coastguard Worker       escape = false;
51*6777b538SAndroid Build Coastguard Worker 
52*6777b538SAndroid Build Coastguard Worker       if (*string == string_end)
53*6777b538SAndroid Build Coastguard Worker         return false;
54*6777b538SAndroid Build Coastguard Worker 
55*6777b538SAndroid Build Coastguard Worker       // Check if the chars match, if so, increment the ptrs.
56*6777b538SAndroid Build Coastguard Worker       const CHAR* pattern_next = *pattern;
57*6777b538SAndroid Build Coastguard Worker       const CHAR* string_next = *string;
58*6777b538SAndroid Build Coastguard Worker       base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
59*6777b538SAndroid Build Coastguard Worker       if (pattern_char == next(&string_next, string_end) &&
60*6777b538SAndroid Build Coastguard Worker           pattern_char != CBU_SENTINEL) {
61*6777b538SAndroid Build Coastguard Worker         *pattern = pattern_next;
62*6777b538SAndroid Build Coastguard Worker         *string = string_next;
63*6777b538SAndroid Build Coastguard Worker         continue;
64*6777b538SAndroid Build Coastguard Worker       }
65*6777b538SAndroid Build Coastguard Worker     }
66*6777b538SAndroid Build Coastguard Worker 
67*6777b538SAndroid Build Coastguard Worker     // Mismatch. If we have reached the maximum distance, return false,
68*6777b538SAndroid Build Coastguard Worker     // otherwise restart at the beginning of the pattern with the next character
69*6777b538SAndroid Build Coastguard Worker     // in the string.
70*6777b538SAndroid Build Coastguard Worker     // TODO(bauerb): This is a naive implementation of substring search, which
71*6777b538SAndroid Build Coastguard Worker     // could be implemented with a more efficient algorithm, e.g.
72*6777b538SAndroid Build Coastguard Worker     // Knuth-Morris-Pratt (at the expense of requiring preprocessing).
73*6777b538SAndroid Build Coastguard Worker     if (maximum_distance == 0)
74*6777b538SAndroid Build Coastguard Worker       return false;
75*6777b538SAndroid Build Coastguard Worker 
76*6777b538SAndroid Build Coastguard Worker     // Because unlimited distance is represented as -1, this will never reach 0
77*6777b538SAndroid Build Coastguard Worker     // and therefore fail the match above.
78*6777b538SAndroid Build Coastguard Worker     maximum_distance--;
79*6777b538SAndroid Build Coastguard Worker     *pattern = pattern_start;
80*6777b538SAndroid Build Coastguard Worker     next(&string_start, string_end);
81*6777b538SAndroid Build Coastguard Worker     *string = string_start;
82*6777b538SAndroid Build Coastguard Worker   }
83*6777b538SAndroid Build Coastguard Worker }
84*6777b538SAndroid Build Coastguard Worker 
85*6777b538SAndroid Build Coastguard Worker // Consumes consecutive wildcard characters (? or *). Returns the maximum number
86*6777b538SAndroid Build Coastguard Worker // of characters matched by the sequence of wildcards, or -1 if the wildcards
87*6777b538SAndroid Build Coastguard Worker // match an arbitrary number of characters (which is the case if it contains at
88*6777b538SAndroid Build Coastguard Worker // least one *).
89*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
EatWildcards(const CHAR ** pattern,const CHAR * end,NEXT next)90*6777b538SAndroid Build Coastguard Worker constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
91*6777b538SAndroid Build Coastguard Worker   int num_question_marks = 0;
92*6777b538SAndroid Build Coastguard Worker   bool has_asterisk = false;
93*6777b538SAndroid Build Coastguard Worker   while (*pattern != end) {
94*6777b538SAndroid Build Coastguard Worker     if (**pattern == '?') {
95*6777b538SAndroid Build Coastguard Worker       num_question_marks++;
96*6777b538SAndroid Build Coastguard Worker     } else if (**pattern == '*') {
97*6777b538SAndroid Build Coastguard Worker       has_asterisk = true;
98*6777b538SAndroid Build Coastguard Worker     } else {
99*6777b538SAndroid Build Coastguard Worker       break;
100*6777b538SAndroid Build Coastguard Worker     }
101*6777b538SAndroid Build Coastguard Worker 
102*6777b538SAndroid Build Coastguard Worker     next(pattern, end);
103*6777b538SAndroid Build Coastguard Worker   }
104*6777b538SAndroid Build Coastguard Worker   return has_asterisk ? -1 : num_question_marks;
105*6777b538SAndroid Build Coastguard Worker }
106*6777b538SAndroid Build Coastguard Worker 
107*6777b538SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,NEXT next)108*6777b538SAndroid Build Coastguard Worker constexpr bool MatchPatternT(const CHAR* eval,
109*6777b538SAndroid Build Coastguard Worker                              const CHAR* eval_end,
110*6777b538SAndroid Build Coastguard Worker                              const CHAR* pattern,
111*6777b538SAndroid Build Coastguard Worker                              const CHAR* pattern_end,
112*6777b538SAndroid Build Coastguard Worker                              NEXT next) {
113*6777b538SAndroid Build Coastguard Worker   do {
114*6777b538SAndroid Build Coastguard Worker     int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
115*6777b538SAndroid Build Coastguard Worker     if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
116*6777b538SAndroid Build Coastguard Worker                         maximum_wildcard_length, next)) {
117*6777b538SAndroid Build Coastguard Worker       return false;
118*6777b538SAndroid Build Coastguard Worker     }
119*6777b538SAndroid Build Coastguard Worker   } while (pattern != pattern_end);
120*6777b538SAndroid Build Coastguard Worker   return true;
121*6777b538SAndroid Build Coastguard Worker }
122*6777b538SAndroid Build Coastguard Worker 
123*6777b538SAndroid Build Coastguard Worker struct NextCharUTF8 {
operator ()base::__anonecfcb9760111::NextCharUTF8124*6777b538SAndroid Build Coastguard Worker   base_icu::UChar32 operator()(const char** p, const char* end) {
125*6777b538SAndroid Build Coastguard Worker     base_icu::UChar32 c;
126*6777b538SAndroid Build Coastguard Worker     int offset = 0;
127*6777b538SAndroid Build Coastguard Worker     CBU8_NEXT(reinterpret_cast<const uint8_t*>(*p), offset, end - *p, c);
128*6777b538SAndroid Build Coastguard Worker     *p += offset;
129*6777b538SAndroid Build Coastguard Worker     return c;
130*6777b538SAndroid Build Coastguard Worker   }
131*6777b538SAndroid Build Coastguard Worker };
132*6777b538SAndroid Build Coastguard Worker 
133*6777b538SAndroid Build Coastguard Worker struct NextCharUTF16 {
operator ()base::__anonecfcb9760111::NextCharUTF16134*6777b538SAndroid Build Coastguard Worker   base_icu::UChar32 operator()(const char16_t** p, const char16_t* end) {
135*6777b538SAndroid Build Coastguard Worker     base_icu::UChar32 c;
136*6777b538SAndroid Build Coastguard Worker     int offset = 0;
137*6777b538SAndroid Build Coastguard Worker     CBU16_NEXT(*p, offset, end - *p, c);
138*6777b538SAndroid Build Coastguard Worker     *p += offset;
139*6777b538SAndroid Build Coastguard Worker     return c;
140*6777b538SAndroid Build Coastguard Worker   }
141*6777b538SAndroid Build Coastguard Worker };
142*6777b538SAndroid Build Coastguard Worker 
143*6777b538SAndroid Build Coastguard Worker }  // namespace
144*6777b538SAndroid Build Coastguard Worker 
MatchPattern(StringPiece eval,StringPiece pattern)145*6777b538SAndroid Build Coastguard Worker bool MatchPattern(StringPiece eval, StringPiece pattern) {
146*6777b538SAndroid Build Coastguard Worker   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
147*6777b538SAndroid Build Coastguard Worker                        pattern.data() + pattern.size(), NextCharUTF8());
148*6777b538SAndroid Build Coastguard Worker }
149*6777b538SAndroid Build Coastguard Worker 
MatchPattern(StringPiece16 eval,StringPiece16 pattern)150*6777b538SAndroid Build Coastguard Worker bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) {
151*6777b538SAndroid Build Coastguard Worker   return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
152*6777b538SAndroid Build Coastguard Worker                        pattern.data() + pattern.size(), NextCharUTF16());
153*6777b538SAndroid Build Coastguard Worker }
154*6777b538SAndroid Build Coastguard Worker 
155*6777b538SAndroid Build Coastguard Worker }  // namespace base
156