1 /*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "src/sksl/lex/RegexParser.h"
9
10 #include "src/sksl/lex/LexUtil.h"
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <utility>
15 #include <vector>
16
parse(std::string source)17 RegexNode RegexParser::parse(std::string source) {
18 fSource = source;
19 fIndex = 0;
20 SkASSERT(fStack.size() == 0);
21 this->regex();
22 SkASSERT(fStack.size() == 1);
23 SkASSERT(fIndex == source.size());
24 return this->pop();
25 }
26
peek()27 char RegexParser::peek() {
28 if (fIndex >= fSource.size()) {
29 return END;
30 }
31 return fSource[fIndex];
32 }
33
expect(char c)34 void RegexParser::expect(char c) {
35 if (this->peek() != c) {
36 printf("expected '%c' at index %d, but found '%c'", c, (int) fIndex, this->peek());
37 exit(1);
38 }
39 ++fIndex;
40 }
41
pop()42 RegexNode RegexParser::pop() {
43 RegexNode result = fStack.top();
44 fStack.pop();
45 return result;
46 }
47
term()48 void RegexParser::term() {
49 switch (this->peek()) {
50 case '(': this->group(); break;
51 case '[': this->set(); break;
52 case '.': this->dot(); break;
53 default: this->literal(); break;
54 }
55 }
56
quantifiedTerm()57 void RegexParser::quantifiedTerm() {
58 this->term();
59 switch (this->peek()) {
60 case '*': fStack.push(RegexNode(RegexNode::kStar_Kind, this->pop())); ++fIndex; break;
61 case '+': fStack.push(RegexNode(RegexNode::kPlus_Kind, this->pop())); ++fIndex; break;
62 case '?': fStack.push(RegexNode(RegexNode::kQuestion_Kind, this->pop())); ++fIndex; break;
63 default: break;
64 }
65 }
66
sequence()67 void RegexParser::sequence() {
68 this->quantifiedTerm();
69 for (;;) {
70 switch (this->peek()) {
71 case END: [[fallthrough]];
72 case '|': [[fallthrough]];
73 case ')': return;
74 default:
75 this->sequence();
76 RegexNode right = this->pop();
77 RegexNode left = this->pop();
78 fStack.emplace(RegexNode::kConcat_Kind, std::move(left), std::move(right));
79 break;
80 }
81 }
82 }
83
escapeSequence(char c)84 RegexNode RegexParser::escapeSequence(char c) {
85 switch (c) {
86 case 'n': return RegexNode(RegexNode::kChar_Kind, '\n');
87 case 'r': return RegexNode(RegexNode::kChar_Kind, '\r');
88 case 't': return RegexNode(RegexNode::kChar_Kind, '\t');
89 case 's': return RegexNode(RegexNode::kCharset_Kind, " \t\n\r");
90 default: return RegexNode(RegexNode::kChar_Kind, c);
91 }
92 }
93
literal()94 void RegexParser::literal() {
95 char c = this->peek();
96 if (c == '\\') {
97 ++fIndex;
98 fStack.push(this->escapeSequence(peek()));
99 ++fIndex;
100 }
101 else {
102 fStack.push(RegexNode(RegexNode::kChar_Kind, c));
103 ++fIndex;
104 }
105 }
106
dot()107 void RegexParser::dot() {
108 this->expect('.');
109 fStack.push(RegexNode(RegexNode::kDot_Kind));
110 }
111
group()112 void RegexParser::group() {
113 this->expect('(');
114 this->regex();
115 this->expect(')');
116 }
117
setItem()118 void RegexParser::setItem() {
119 this->literal();
120 if (this->peek() == '-') {
121 ++fIndex;
122 if (peek() == ']') {
123 fStack.push(RegexNode(RegexNode::kChar_Kind, '-'));
124 }
125 else {
126 literal();
127 RegexNode end = this->pop();
128 SkASSERT(end.fKind == RegexNode::kChar_Kind);
129 RegexNode start = this->pop();
130 SkASSERT(start.fKind == RegexNode::kChar_Kind);
131 fStack.push(RegexNode(RegexNode::kRange_Kind, std::move(start), std::move(end)));
132 }
133 }
134 }
135
set()136 void RegexParser::set() {
137 expect('[');
138 size_t depth = fStack.size();
139 RegexNode set(RegexNode::kCharset_Kind);
140 if (this->peek() == '^') {
141 ++fIndex;
142 set.fPayload.fBool = true;
143 }
144 else {
145 set.fPayload.fBool = false;
146 }
147 for (;;) {
148 switch (this->peek()) {
149 case ']':
150 ++fIndex;
151 while (fStack.size() > depth) {
152 set.fChildren.push_back(this->pop());
153 }
154 fStack.push(std::move(set));
155 return;
156 case END:
157 printf("unterminated character set\n");
158 exit(1);
159 default:
160 this->setItem();
161 break;
162 }
163 }
164 }
165
regex()166 void RegexParser::regex() {
167 this->sequence();
168 switch (this->peek()) {
169 case '|': {
170 ++fIndex;
171 this->regex();
172 RegexNode right = this->pop();
173 RegexNode left = this->pop();
174 fStack.push(RegexNode(RegexNode::kOr_Kind, left, right));
175 break;
176 }
177 case END: // fall through
178 case ')':
179 return;
180 default:
181 SkASSERT(false);
182 }
183 }
184