xref: /aosp_15_r20/external/skia/src/sksl/lex/RegexParser.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker  * Copyright 2017 Google Inc.
3*c8dee2aaSAndroid Build Coastguard Worker  *
4*c8dee2aaSAndroid Build Coastguard Worker  * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker  * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker  */
7*c8dee2aaSAndroid Build Coastguard Worker 
8*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/RegexParser.h"
9*c8dee2aaSAndroid Build Coastguard Worker 
10*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/LexUtil.h"
11*c8dee2aaSAndroid Build Coastguard Worker 
12*c8dee2aaSAndroid Build Coastguard Worker #include <stdio.h>
13*c8dee2aaSAndroid Build Coastguard Worker #include <stdlib.h>
14*c8dee2aaSAndroid Build Coastguard Worker #include <utility>
15*c8dee2aaSAndroid Build Coastguard Worker #include <vector>
16*c8dee2aaSAndroid Build Coastguard Worker 
parse(std::string source)17*c8dee2aaSAndroid Build Coastguard Worker RegexNode RegexParser::parse(std::string source) {
18*c8dee2aaSAndroid Build Coastguard Worker     fSource = source;
19*c8dee2aaSAndroid Build Coastguard Worker     fIndex = 0;
20*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(fStack.size() == 0);
21*c8dee2aaSAndroid Build Coastguard Worker     this->regex();
22*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(fStack.size() == 1);
23*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(fIndex == source.size());
24*c8dee2aaSAndroid Build Coastguard Worker     return this->pop();
25*c8dee2aaSAndroid Build Coastguard Worker }
26*c8dee2aaSAndroid Build Coastguard Worker 
peek()27*c8dee2aaSAndroid Build Coastguard Worker char RegexParser::peek() {
28*c8dee2aaSAndroid Build Coastguard Worker     if (fIndex >= fSource.size()) {
29*c8dee2aaSAndroid Build Coastguard Worker         return END;
30*c8dee2aaSAndroid Build Coastguard Worker     }
31*c8dee2aaSAndroid Build Coastguard Worker     return fSource[fIndex];
32*c8dee2aaSAndroid Build Coastguard Worker }
33*c8dee2aaSAndroid Build Coastguard Worker 
expect(char c)34*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::expect(char c) {
35*c8dee2aaSAndroid Build Coastguard Worker     if (this->peek() != c) {
36*c8dee2aaSAndroid Build Coastguard Worker         printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
37*c8dee2aaSAndroid Build Coastguard Worker         exit(1);
38*c8dee2aaSAndroid Build Coastguard Worker     }
39*c8dee2aaSAndroid Build Coastguard Worker     ++fIndex;
40*c8dee2aaSAndroid Build Coastguard Worker }
41*c8dee2aaSAndroid Build Coastguard Worker 
pop()42*c8dee2aaSAndroid Build Coastguard Worker RegexNode RegexParser::pop() {
43*c8dee2aaSAndroid Build Coastguard Worker     RegexNode result = fStack.top();
44*c8dee2aaSAndroid Build Coastguard Worker     fStack.pop();
45*c8dee2aaSAndroid Build Coastguard Worker     return result;
46*c8dee2aaSAndroid Build Coastguard Worker }
47*c8dee2aaSAndroid Build Coastguard Worker 
term()48*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::term() {
49*c8dee2aaSAndroid Build Coastguard Worker     switch (this->peek()) {
50*c8dee2aaSAndroid Build Coastguard Worker         case '(': this->group();  break;
51*c8dee2aaSAndroid Build Coastguard Worker         case '[': this->set();    break;
52*c8dee2aaSAndroid Build Coastguard Worker         case '.': this->dot();    break;
53*c8dee2aaSAndroid Build Coastguard Worker         default: this->literal(); break;
54*c8dee2aaSAndroid Build Coastguard Worker     }
55*c8dee2aaSAndroid Build Coastguard Worker }
56*c8dee2aaSAndroid Build Coastguard Worker 
quantifiedTerm()57*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::quantifiedTerm() {
58*c8dee2aaSAndroid Build Coastguard Worker     this->term();
59*c8dee2aaSAndroid Build Coastguard Worker     switch (this->peek()) {
60*c8dee2aaSAndroid Build Coastguard Worker         case '*': fStack.push(RegexNode(RegexNode::kStar_Kind,     this->pop())); ++fIndex; break;
61*c8dee2aaSAndroid Build Coastguard Worker         case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind,     this->pop())); ++fIndex; break;
62*c8dee2aaSAndroid Build Coastguard Worker         case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
63*c8dee2aaSAndroid Build Coastguard Worker         default:  break;
64*c8dee2aaSAndroid Build Coastguard Worker     }
65*c8dee2aaSAndroid Build Coastguard Worker }
66*c8dee2aaSAndroid Build Coastguard Worker 
sequence()67*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::sequence() {
68*c8dee2aaSAndroid Build Coastguard Worker     this->quantifiedTerm();
69*c8dee2aaSAndroid Build Coastguard Worker     for (;;) {
70*c8dee2aaSAndroid Build Coastguard Worker         switch (this->peek()) {
71*c8dee2aaSAndroid Build Coastguard Worker             case END: [[fallthrough]];
72*c8dee2aaSAndroid Build Coastguard Worker             case '|': [[fallthrough]];
73*c8dee2aaSAndroid Build Coastguard Worker             case ')': return;
74*c8dee2aaSAndroid Build Coastguard Worker             default:
75*c8dee2aaSAndroid Build Coastguard Worker                 this->sequence();
76*c8dee2aaSAndroid Build Coastguard Worker                 RegexNode right = this->pop();
77*c8dee2aaSAndroid Build Coastguard Worker                 RegexNode left = this->pop();
78*c8dee2aaSAndroid Build Coastguard Worker                 fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
79*c8dee2aaSAndroid Build Coastguard Worker                 break;
80*c8dee2aaSAndroid Build Coastguard Worker         }
81*c8dee2aaSAndroid Build Coastguard Worker     }
82*c8dee2aaSAndroid Build Coastguard Worker }
83*c8dee2aaSAndroid Build Coastguard Worker 
escapeSequence(char c)84*c8dee2aaSAndroid Build Coastguard Worker RegexNode RegexParser::escapeSequence(char c) {
85*c8dee2aaSAndroid Build Coastguard Worker     switch (c) {
86*c8dee2aaSAndroid Build Coastguard Worker         case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
87*c8dee2aaSAndroid Build Coastguard Worker         case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
88*c8dee2aaSAndroid Build Coastguard Worker         case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
89*c8dee2aaSAndroid Build Coastguard Worker         case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
90*c8dee2aaSAndroid Build Coastguard Worker         default:  return RegexNode(RegexNode::kChar_Kind, c);
91*c8dee2aaSAndroid Build Coastguard Worker     }
92*c8dee2aaSAndroid Build Coastguard Worker }
93*c8dee2aaSAndroid Build Coastguard Worker 
literal()94*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::literal() {
95*c8dee2aaSAndroid Build Coastguard Worker     char c = this->peek();
96*c8dee2aaSAndroid Build Coastguard Worker     if (c == '\\') {
97*c8dee2aaSAndroid Build Coastguard Worker         ++fIndex;
98*c8dee2aaSAndroid Build Coastguard Worker         fStack.push(this->escapeSequence(peek()));
99*c8dee2aaSAndroid Build Coastguard Worker         ++fIndex;
100*c8dee2aaSAndroid Build Coastguard Worker     }
101*c8dee2aaSAndroid Build Coastguard Worker     else {
102*c8dee2aaSAndroid Build Coastguard Worker         fStack.push(RegexNode(RegexNode::kChar_Kind, c));
103*c8dee2aaSAndroid Build Coastguard Worker         ++fIndex;
104*c8dee2aaSAndroid Build Coastguard Worker     }
105*c8dee2aaSAndroid Build Coastguard Worker }
106*c8dee2aaSAndroid Build Coastguard Worker 
dot()107*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::dot() {
108*c8dee2aaSAndroid Build Coastguard Worker     this->expect('.');
109*c8dee2aaSAndroid Build Coastguard Worker     fStack.push(RegexNode(RegexNode::kDot_Kind));
110*c8dee2aaSAndroid Build Coastguard Worker }
111*c8dee2aaSAndroid Build Coastguard Worker 
group()112*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::group() {
113*c8dee2aaSAndroid Build Coastguard Worker     this->expect('(');
114*c8dee2aaSAndroid Build Coastguard Worker     this->regex();
115*c8dee2aaSAndroid Build Coastguard Worker     this->expect(')');
116*c8dee2aaSAndroid Build Coastguard Worker }
117*c8dee2aaSAndroid Build Coastguard Worker 
setItem()118*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::setItem() {
119*c8dee2aaSAndroid Build Coastguard Worker     this->literal();
120*c8dee2aaSAndroid Build Coastguard Worker     if (this->peek() == '-') {
121*c8dee2aaSAndroid Build Coastguard Worker         ++fIndex;
122*c8dee2aaSAndroid Build Coastguard Worker         if (peek() == ']') {
123*c8dee2aaSAndroid Build Coastguard Worker             fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
124*c8dee2aaSAndroid Build Coastguard Worker         }
125*c8dee2aaSAndroid Build Coastguard Worker         else {
126*c8dee2aaSAndroid Build Coastguard Worker             literal();
127*c8dee2aaSAndroid Build Coastguard Worker             RegexNode end = this->pop();
128*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(end.fKind == RegexNode::kChar_Kind);
129*c8dee2aaSAndroid Build Coastguard Worker             RegexNode start = this->pop();
130*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(start.fKind == RegexNode::kChar_Kind);
131*c8dee2aaSAndroid Build Coastguard Worker             fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
132*c8dee2aaSAndroid Build Coastguard Worker         }
133*c8dee2aaSAndroid Build Coastguard Worker     }
134*c8dee2aaSAndroid Build Coastguard Worker }
135*c8dee2aaSAndroid Build Coastguard Worker 
set()136*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::set() {
137*c8dee2aaSAndroid Build Coastguard Worker     expect('[');
138*c8dee2aaSAndroid Build Coastguard Worker     size_t depth = fStack.size();
139*c8dee2aaSAndroid Build Coastguard Worker     RegexNode set(RegexNode::kCharset_Kind);
140*c8dee2aaSAndroid Build Coastguard Worker     if (this->peek() == '^') {
141*c8dee2aaSAndroid Build Coastguard Worker         ++fIndex;
142*c8dee2aaSAndroid Build Coastguard Worker         set.fPayload.fBool = true;
143*c8dee2aaSAndroid Build Coastguard Worker     }
144*c8dee2aaSAndroid Build Coastguard Worker     else {
145*c8dee2aaSAndroid Build Coastguard Worker         set.fPayload.fBool = false;
146*c8dee2aaSAndroid Build Coastguard Worker     }
147*c8dee2aaSAndroid Build Coastguard Worker     for (;;) {
148*c8dee2aaSAndroid Build Coastguard Worker         switch (this->peek()) {
149*c8dee2aaSAndroid Build Coastguard Worker             case ']':
150*c8dee2aaSAndroid Build Coastguard Worker                 ++fIndex;
151*c8dee2aaSAndroid Build Coastguard Worker                 while (fStack.size() > depth) {
152*c8dee2aaSAndroid Build Coastguard Worker                     set.fChildren.push_back(this->pop());
153*c8dee2aaSAndroid Build Coastguard Worker                 }
154*c8dee2aaSAndroid Build Coastguard Worker                 fStack.push(std::move(set));
155*c8dee2aaSAndroid Build Coastguard Worker                 return;
156*c8dee2aaSAndroid Build Coastguard Worker             case END:
157*c8dee2aaSAndroid Build Coastguard Worker                 printf("unterminated character set\n");
158*c8dee2aaSAndroid Build Coastguard Worker                 exit(1);
159*c8dee2aaSAndroid Build Coastguard Worker             default:
160*c8dee2aaSAndroid Build Coastguard Worker                 this->setItem();
161*c8dee2aaSAndroid Build Coastguard Worker                 break;
162*c8dee2aaSAndroid Build Coastguard Worker         }
163*c8dee2aaSAndroid Build Coastguard Worker     }
164*c8dee2aaSAndroid Build Coastguard Worker }
165*c8dee2aaSAndroid Build Coastguard Worker 
regex()166*c8dee2aaSAndroid Build Coastguard Worker void RegexParser::regex() {
167*c8dee2aaSAndroid Build Coastguard Worker     this->sequence();
168*c8dee2aaSAndroid Build Coastguard Worker     switch (this->peek()) {
169*c8dee2aaSAndroid Build Coastguard Worker         case '|': {
170*c8dee2aaSAndroid Build Coastguard Worker             ++fIndex;
171*c8dee2aaSAndroid Build Coastguard Worker             this->regex();
172*c8dee2aaSAndroid Build Coastguard Worker             RegexNode right = this->pop();
173*c8dee2aaSAndroid Build Coastguard Worker             RegexNode left = this->pop();
174*c8dee2aaSAndroid Build Coastguard Worker             fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
175*c8dee2aaSAndroid Build Coastguard Worker             break;
176*c8dee2aaSAndroid Build Coastguard Worker         }
177*c8dee2aaSAndroid Build Coastguard Worker         case END: // fall through
178*c8dee2aaSAndroid Build Coastguard Worker         case ')':
179*c8dee2aaSAndroid Build Coastguard Worker             return;
180*c8dee2aaSAndroid Build Coastguard Worker         default:
181*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(false);
182*c8dee2aaSAndroid Build Coastguard Worker     }
183*c8dee2aaSAndroid Build Coastguard Worker }
184