xref: /aosp_15_r20/external/skia/src/sksl/lex/RegexParser.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sksl/lex/RegexParser.h"
9 
10 #include "src/sksl/lex/LexUtil.h"
11 
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <utility>
15 #include <vector>
16 
parse(std::string source)17 RegexNode RegexParser::parse(std::string source) {
18     fSource = source;
19     fIndex = 0;
20     SkASSERT(fStack.size() == 0);
21     this->regex();
22     SkASSERT(fStack.size() == 1);
23     SkASSERT(fIndex == source.size());
24     return this->pop();
25 }
26 
peek()27 char RegexParser::peek() {
28     if (fIndex >= fSource.size()) {
29         return END;
30     }
31     return fSource[fIndex];
32 }
33 
expect(char c)34 void RegexParser::expect(char c) {
35     if (this->peek() != c) {
36         printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
37         exit(1);
38     }
39     ++fIndex;
40 }
41 
pop()42 RegexNode RegexParser::pop() {
43     RegexNode result = fStack.top();
44     fStack.pop();
45     return result;
46 }
47 
term()48 void RegexParser::term() {
49     switch (this->peek()) {
50         case '(': this->group();  break;
51         case '[': this->set();    break;
52         case '.': this->dot();    break;
53         default: this->literal(); break;
54     }
55 }
56 
quantifiedTerm()57 void RegexParser::quantifiedTerm() {
58     this->term();
59     switch (this->peek()) {
60         case '*': fStack.push(RegexNode(RegexNode::kStar_Kind,     this->pop())); ++fIndex; break;
61         case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind,     this->pop())); ++fIndex; break;
62         case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
63         default:  break;
64     }
65 }
66 
sequence()67 void RegexParser::sequence() {
68     this->quantifiedTerm();
69     for (;;) {
70         switch (this->peek()) {
71             case END: [[fallthrough]];
72             case '|': [[fallthrough]];
73             case ')': return;
74             default:
75                 this->sequence();
76                 RegexNode right = this->pop();
77                 RegexNode left = this->pop();
78                 fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
79                 break;
80         }
81     }
82 }
83 
escapeSequence(char c)84 RegexNode RegexParser::escapeSequence(char c) {
85     switch (c) {
86         case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
87         case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
88         case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
89         case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
90         default:  return RegexNode(RegexNode::kChar_Kind, c);
91     }
92 }
93 
literal()94 void RegexParser::literal() {
95     char c = this->peek();
96     if (c == '\\') {
97         ++fIndex;
98         fStack.push(this->escapeSequence(peek()));
99         ++fIndex;
100     }
101     else {
102         fStack.push(RegexNode(RegexNode::kChar_Kind, c));
103         ++fIndex;
104     }
105 }
106 
dot()107 void RegexParser::dot() {
108     this->expect('.');
109     fStack.push(RegexNode(RegexNode::kDot_Kind));
110 }
111 
group()112 void RegexParser::group() {
113     this->expect('(');
114     this->regex();
115     this->expect(')');
116 }
117 
setItem()118 void RegexParser::setItem() {
119     this->literal();
120     if (this->peek() == '-') {
121         ++fIndex;
122         if (peek() == ']') {
123             fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
124         }
125         else {
126             literal();
127             RegexNode end = this->pop();
128             SkASSERT(end.fKind == RegexNode::kChar_Kind);
129             RegexNode start = this->pop();
130             SkASSERT(start.fKind == RegexNode::kChar_Kind);
131             fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
132         }
133     }
134 }
135 
set()136 void RegexParser::set() {
137     expect('[');
138     size_t depth = fStack.size();
139     RegexNode set(RegexNode::kCharset_Kind);
140     if (this->peek() == '^') {
141         ++fIndex;
142         set.fPayload.fBool = true;
143     }
144     else {
145         set.fPayload.fBool = false;
146     }
147     for (;;) {
148         switch (this->peek()) {
149             case ']':
150                 ++fIndex;
151                 while (fStack.size() > depth) {
152                     set.fChildren.push_back(this->pop());
153                 }
154                 fStack.push(std::move(set));
155                 return;
156             case END:
157                 printf("unterminated character set\n");
158                 exit(1);
159             default:
160                 this->setItem();
161                 break;
162         }
163     }
164 }
165 
regex()166 void RegexParser::regex() {
167     this->sequence();
168     switch (this->peek()) {
169         case '|': {
170             ++fIndex;
171             this->regex();
172             RegexNode right = this->pop();
173             RegexNode left = this->pop();
174             fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
175             break;
176         }
177         case END: // fall through
178         case ')':
179             return;
180         default:
181             SkASSERT(false);
182     }
183 }
184