1*635a8641SAndroid Build Coastguard Worker // Copyright 2015 The Chromium Authors. All rights reserved.
2*635a8641SAndroid Build Coastguard Worker // Use of this source code is governed by a BSD-style license that can be
3*635a8641SAndroid Build Coastguard Worker // found in the LICENSE file.
4*635a8641SAndroid Build Coastguard Worker
5*635a8641SAndroid Build Coastguard Worker #include "base/strings/pattern.h"
6*635a8641SAndroid Build Coastguard Worker
7*635a8641SAndroid Build Coastguard Worker #include "base/third_party/icu/icu_utf.h"
8*635a8641SAndroid Build Coastguard Worker
9*635a8641SAndroid Build Coastguard Worker namespace base {
10*635a8641SAndroid Build Coastguard Worker
11*635a8641SAndroid Build Coastguard Worker namespace {
12*635a8641SAndroid Build Coastguard Worker
IsWildcard(base_icu::UChar32 character)13*635a8641SAndroid Build Coastguard Worker constexpr bool IsWildcard(base_icu::UChar32 character) {
14*635a8641SAndroid Build Coastguard Worker return character == '*' || character == '?';
15*635a8641SAndroid Build Coastguard Worker }
16*635a8641SAndroid Build Coastguard Worker
17*635a8641SAndroid Build Coastguard Worker // Searches for the next subpattern of |pattern| in |string|, up to the given
18*635a8641SAndroid Build Coastguard Worker // |maximum_distance|. The subpattern extends from the start of |pattern| up to
19*635a8641SAndroid Build Coastguard Worker // the first wildcard character (or the end of the string). If the value of
20*635a8641SAndroid Build Coastguard Worker // |maximum_distance| is negative, the maximum distance is considered infinite.
21*635a8641SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
SearchForChars(const CHAR ** pattern,const CHAR * pattern_end,const CHAR ** string,const CHAR * string_end,int maximum_distance,NEXT next)22*635a8641SAndroid Build Coastguard Worker constexpr bool SearchForChars(const CHAR** pattern,
23*635a8641SAndroid Build Coastguard Worker const CHAR* pattern_end,
24*635a8641SAndroid Build Coastguard Worker const CHAR** string,
25*635a8641SAndroid Build Coastguard Worker const CHAR* string_end,
26*635a8641SAndroid Build Coastguard Worker int maximum_distance,
27*635a8641SAndroid Build Coastguard Worker NEXT next) {
28*635a8641SAndroid Build Coastguard Worker const CHAR* pattern_start = *pattern;
29*635a8641SAndroid Build Coastguard Worker const CHAR* string_start = *string;
30*635a8641SAndroid Build Coastguard Worker bool escape = false;
31*635a8641SAndroid Build Coastguard Worker while (true) {
32*635a8641SAndroid Build Coastguard Worker if (*pattern == pattern_end) {
33*635a8641SAndroid Build Coastguard Worker // If this is the end of the pattern, only accept the end of the string;
34*635a8641SAndroid Build Coastguard Worker // anything else falls through to the mismatch case.
35*635a8641SAndroid Build Coastguard Worker if (*string == string_end)
36*635a8641SAndroid Build Coastguard Worker return true;
37*635a8641SAndroid Build Coastguard Worker } else {
38*635a8641SAndroid Build Coastguard Worker // If we have found a wildcard, we're done.
39*635a8641SAndroid Build Coastguard Worker if (!escape && IsWildcard(**pattern))
40*635a8641SAndroid Build Coastguard Worker return true;
41*635a8641SAndroid Build Coastguard Worker
42*635a8641SAndroid Build Coastguard Worker // Check if the escape character is found. If so, skip it and move to the
43*635a8641SAndroid Build Coastguard Worker // next character.
44*635a8641SAndroid Build Coastguard Worker if (!escape && **pattern == '\\') {
45*635a8641SAndroid Build Coastguard Worker escape = true;
46*635a8641SAndroid Build Coastguard Worker next(pattern, pattern_end);
47*635a8641SAndroid Build Coastguard Worker continue;
48*635a8641SAndroid Build Coastguard Worker }
49*635a8641SAndroid Build Coastguard Worker
50*635a8641SAndroid Build Coastguard Worker escape = false;
51*635a8641SAndroid Build Coastguard Worker
52*635a8641SAndroid Build Coastguard Worker if (*string == string_end)
53*635a8641SAndroid Build Coastguard Worker return false;
54*635a8641SAndroid Build Coastguard Worker
55*635a8641SAndroid Build Coastguard Worker // Check if the chars match, if so, increment the ptrs.
56*635a8641SAndroid Build Coastguard Worker const CHAR* pattern_next = *pattern;
57*635a8641SAndroid Build Coastguard Worker const CHAR* string_next = *string;
58*635a8641SAndroid Build Coastguard Worker base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
59*635a8641SAndroid Build Coastguard Worker if (pattern_char == next(&string_next, string_end) &&
60*635a8641SAndroid Build Coastguard Worker pattern_char != CBU_SENTINEL) {
61*635a8641SAndroid Build Coastguard Worker *pattern = pattern_next;
62*635a8641SAndroid Build Coastguard Worker *string = string_next;
63*635a8641SAndroid Build Coastguard Worker continue;
64*635a8641SAndroid Build Coastguard Worker }
65*635a8641SAndroid Build Coastguard Worker }
66*635a8641SAndroid Build Coastguard Worker
67*635a8641SAndroid Build Coastguard Worker // Mismatch. If we have reached the maximum distance, return false,
68*635a8641SAndroid Build Coastguard Worker // otherwise restart at the beginning of the pattern with the next character
69*635a8641SAndroid Build Coastguard Worker // in the string.
70*635a8641SAndroid Build Coastguard Worker // TODO(bauerb): This is a naive implementation of substring search, which
71*635a8641SAndroid Build Coastguard Worker // could be implemented with a more efficient algorithm, e.g.
72*635a8641SAndroid Build Coastguard Worker // Knuth-Morris-Pratt (at the expense of requiring preprocessing).
73*635a8641SAndroid Build Coastguard Worker if (maximum_distance == 0)
74*635a8641SAndroid Build Coastguard Worker return false;
75*635a8641SAndroid Build Coastguard Worker
76*635a8641SAndroid Build Coastguard Worker // Because unlimited distance is represented as -1, this will never reach 0
77*635a8641SAndroid Build Coastguard Worker // and therefore fail the match above.
78*635a8641SAndroid Build Coastguard Worker maximum_distance--;
79*635a8641SAndroid Build Coastguard Worker *pattern = pattern_start;
80*635a8641SAndroid Build Coastguard Worker next(&string_start, string_end);
81*635a8641SAndroid Build Coastguard Worker *string = string_start;
82*635a8641SAndroid Build Coastguard Worker }
83*635a8641SAndroid Build Coastguard Worker }
84*635a8641SAndroid Build Coastguard Worker
85*635a8641SAndroid Build Coastguard Worker // Consumes consecutive wildcard characters (? or *). Returns the maximum number
86*635a8641SAndroid Build Coastguard Worker // of characters matched by the sequence of wildcards, or -1 if the wildcards
87*635a8641SAndroid Build Coastguard Worker // match an arbitrary number of characters (which is the case if it contains at
88*635a8641SAndroid Build Coastguard Worker // least one *).
89*635a8641SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
EatWildcards(const CHAR ** pattern,const CHAR * end,NEXT next)90*635a8641SAndroid Build Coastguard Worker constexpr int EatWildcards(const CHAR** pattern, const CHAR* end, NEXT next) {
91*635a8641SAndroid Build Coastguard Worker int num_question_marks = 0;
92*635a8641SAndroid Build Coastguard Worker bool has_asterisk = false;
93*635a8641SAndroid Build Coastguard Worker while (*pattern != end) {
94*635a8641SAndroid Build Coastguard Worker if (**pattern == '?') {
95*635a8641SAndroid Build Coastguard Worker num_question_marks++;
96*635a8641SAndroid Build Coastguard Worker } else if (**pattern == '*') {
97*635a8641SAndroid Build Coastguard Worker has_asterisk = true;
98*635a8641SAndroid Build Coastguard Worker } else {
99*635a8641SAndroid Build Coastguard Worker break;
100*635a8641SAndroid Build Coastguard Worker }
101*635a8641SAndroid Build Coastguard Worker
102*635a8641SAndroid Build Coastguard Worker next(pattern, end);
103*635a8641SAndroid Build Coastguard Worker }
104*635a8641SAndroid Build Coastguard Worker return has_asterisk ? -1 : num_question_marks;
105*635a8641SAndroid Build Coastguard Worker }
106*635a8641SAndroid Build Coastguard Worker
107*635a8641SAndroid Build Coastguard Worker template <typename CHAR, typename NEXT>
MatchPatternT(const CHAR * eval,const CHAR * eval_end,const CHAR * pattern,const CHAR * pattern_end,NEXT next)108*635a8641SAndroid Build Coastguard Worker constexpr bool MatchPatternT(const CHAR* eval,
109*635a8641SAndroid Build Coastguard Worker const CHAR* eval_end,
110*635a8641SAndroid Build Coastguard Worker const CHAR* pattern,
111*635a8641SAndroid Build Coastguard Worker const CHAR* pattern_end,
112*635a8641SAndroid Build Coastguard Worker NEXT next) {
113*635a8641SAndroid Build Coastguard Worker do {
114*635a8641SAndroid Build Coastguard Worker int maximum_wildcard_length = EatWildcards(&pattern, pattern_end, next);
115*635a8641SAndroid Build Coastguard Worker if (!SearchForChars(&pattern, pattern_end, &eval, eval_end,
116*635a8641SAndroid Build Coastguard Worker maximum_wildcard_length, next)) {
117*635a8641SAndroid Build Coastguard Worker return false;
118*635a8641SAndroid Build Coastguard Worker }
119*635a8641SAndroid Build Coastguard Worker } while (pattern != pattern_end);
120*635a8641SAndroid Build Coastguard Worker return true;
121*635a8641SAndroid Build Coastguard Worker }
122*635a8641SAndroid Build Coastguard Worker
123*635a8641SAndroid Build Coastguard Worker struct NextCharUTF8 {
operator ()base::__anon68bd02000111::NextCharUTF8124*635a8641SAndroid Build Coastguard Worker base_icu::UChar32 operator()(const char** p, const char* end) {
125*635a8641SAndroid Build Coastguard Worker base_icu::UChar32 c;
126*635a8641SAndroid Build Coastguard Worker int offset = 0;
127*635a8641SAndroid Build Coastguard Worker CBU8_NEXT(*p, offset, end - *p, c);
128*635a8641SAndroid Build Coastguard Worker *p += offset;
129*635a8641SAndroid Build Coastguard Worker return c;
130*635a8641SAndroid Build Coastguard Worker }
131*635a8641SAndroid Build Coastguard Worker };
132*635a8641SAndroid Build Coastguard Worker
133*635a8641SAndroid Build Coastguard Worker struct NextCharUTF16 {
operator ()base::__anon68bd02000111::NextCharUTF16134*635a8641SAndroid Build Coastguard Worker base_icu::UChar32 operator()(const char16** p, const char16* end) {
135*635a8641SAndroid Build Coastguard Worker base_icu::UChar32 c;
136*635a8641SAndroid Build Coastguard Worker int offset = 0;
137*635a8641SAndroid Build Coastguard Worker CBU16_NEXT(*p, offset, end - *p, c);
138*635a8641SAndroid Build Coastguard Worker *p += offset;
139*635a8641SAndroid Build Coastguard Worker return c;
140*635a8641SAndroid Build Coastguard Worker }
141*635a8641SAndroid Build Coastguard Worker };
142*635a8641SAndroid Build Coastguard Worker
143*635a8641SAndroid Build Coastguard Worker } // namespace
144*635a8641SAndroid Build Coastguard Worker
MatchPattern(StringPiece eval,StringPiece pattern)145*635a8641SAndroid Build Coastguard Worker bool MatchPattern(StringPiece eval, StringPiece pattern) {
146*635a8641SAndroid Build Coastguard Worker return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
147*635a8641SAndroid Build Coastguard Worker pattern.data() + pattern.size(), NextCharUTF8());
148*635a8641SAndroid Build Coastguard Worker }
149*635a8641SAndroid Build Coastguard Worker
MatchPattern(StringPiece16 eval,StringPiece16 pattern)150*635a8641SAndroid Build Coastguard Worker bool MatchPattern(StringPiece16 eval, StringPiece16 pattern) {
151*635a8641SAndroid Build Coastguard Worker return MatchPatternT(eval.data(), eval.data() + eval.size(), pattern.data(),
152*635a8641SAndroid Build Coastguard Worker pattern.data() + pattern.size(), NextCharUTF16());
153*635a8641SAndroid Build Coastguard Worker }
154*635a8641SAndroid Build Coastguard Worker
155*635a8641SAndroid Build Coastguard Worker } // namespace base
156