xref: /aosp_15_r20/external/skia/src/sksl/lex/Main.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1 /*
2  * Copyright 2017 Google Inc.
3  *
4  * Use of this source code is governed by a BSD-style license that can be
5  * found in the LICENSE file.
6  */
7 
8 #include "src/sksl/lex/DFA.h"
9 #include "src/sksl/lex/LexUtil.h"
10 #include "src/sksl/lex/NFA.h"
11 #include "src/sksl/lex/NFAtoDFA.h"
12 #include "src/sksl/lex/RegexNode.h"
13 #include "src/sksl/lex/RegexParser.h"
14 #include "src/sksl/lex/TransitionTable.h"
15 
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <algorithm>
19 #include <sstream>
20 #include <string>
21 #include <vector>
22 
23 /**
24  * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
25  * file is a text file with one token definition per line. Each line is of the form:
26  * <TOKEN_NAME> = <pattern>
27  * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
28  */
29 
30 static constexpr const char HEADER[] =
31     "/*\n"
32     " * Copyright 2017 Google Inc.\n"
33     " *\n"
34     " * Use of this source code is governed by a BSD-style license that can be\n"
35     " * found in the LICENSE file.\n"
36     " */\n"
37     "/*****************************************************************************************\n"
38     " ******************** This file was generated by sksllex. Do not edit. *******************\n"
39     " *****************************************************************************************/\n";
40 
writeH(const DFA & dfa,const char * lexer,const char * token,const std::vector<std::string> & tokens,const char * hPath)41 static void writeH(const DFA& dfa, const char* lexer, const char* token,
42                    const std::vector<std::string>& tokens, const char* hPath) {
43     std::ofstream out(hPath);
44     SkASSERT(out.good());
45     out << HEADER;
46     out << "#ifndef SKSL_" << lexer << "\n";
47     out << "#define SKSL_" << lexer << "\n";
48     out << "#include <cstdint>\n";
49     out << "#include <string_view>\n";
50     out << "namespace SkSL {\n";
51     out << "\n";
52     out << "struct " << token << " {\n";
53     out << "    enum class Kind {\n";
54     for (const std::string& t : tokens) {
55         out << "        TK_" << t << ",\n";
56     }
57     out << "        TK_NONE,";
58     out << R"(
59     };
60 
61     )" << token << "() {}";
62 
63     out << token << R"((Kind kind, int32_t offset, int32_t length)
64     : fKind(kind)
65     , fOffset(offset)
66     , fLength(length) {}
67 
68     Kind fKind      = Kind::TK_NONE;
69     int32_t fOffset = -1;
70     int32_t fLength = -1;
71 };
72 
73 class )" << lexer << R"( {
74 public:
75     void start(std::string_view text) {
76         fText = text;
77         fOffset = 0;
78     }
79 
80     )" << token << R"( next();
81 
82     struct Checkpoint {
83         int32_t fOffset;
84     };
85 
86     Checkpoint getCheckpoint() const {
87         return {fOffset};
88     }
89 
90     void rewindToCheckpoint(Checkpoint checkpoint) {
91         fOffset = checkpoint.fOffset;
92     }
93 
94 private:
95     std::string_view fText;
96     int32_t fOffset;
97 };
98 
99 } // namespace
100 #endif
101 )";
102 }
103 
writeCPP(const DFA & dfa,const char * lexer,const char * token,const char * include,const char * cppPath)104 static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
105                      const char* cppPath) {
106     std::ofstream out(cppPath);
107     SkASSERT(out.good());
108     out << HEADER;
109     out << "#include \"" << include << "\"\n";
110     out << "\n";
111     out << "namespace SkSL {\n";
112     out << "\n";
113 
114     size_t states = 0;
115     for (const auto& row : dfa.fTransitions) {
116         states = std::max(states, row.size());
117     }
118     out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
119 
120     // Find the first character mapped in our DFA.
121     size_t startChar = 0;
122     for (; startChar < dfa.fCharMappings.size(); ++startChar) {
123         if (dfa.fCharMappings[startChar] != 0) {
124             break;
125         }
126     }
127 
128     // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
129     // input.
130     SkASSERT(startChar < 18);
131     out << "static constexpr uint8_t kInvalidChar = 18;";
132     out << "static constexpr uint8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {";
133     for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
134         out << std::to_string(dfa.fCharMappings[index]) << ", ";
135     }
136     out << "};\n";
137 
138     WriteTransitionTable(out, dfa, states);
139 
140     out << "static const uint8_t kAccepts[" << states << "] = {";
141     for (size_t i = 0; i < states; ++i) {
142         if (i < dfa.fAccepts.size() && dfa.fAccepts[i] != INVALID) {
143             out << " " << dfa.fAccepts[i] << ",";
144         } else {
145             out << " 255,";
146         }
147     }
148     out << "};\n";
149     out << "\n";
150 
151     out << token << " " << lexer << "::next() {";
152     out << R"(
153     // Note that we cheat here: normally a lexer needs to worry about the case
154     // where a token has a prefix which is not itself a valid token - for instance,
155     // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
156     // tokens. Our grammar doesn't have this property, so we can simplify the logic
157     // a bit.
158     int32_t startOffset = fOffset;
159     State   state = 1;
160     for (;;) {
161         if (fOffset >= (int32_t)fText.length()) {
162             if (startOffset == (int32_t)fText.length() || kAccepts[state] == 255) {
163                 return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
164             }
165             break;
166         }
167         uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
168         if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
169             c = kInvalidChar;
170         }
171         State newState = get_transition(kMappings[c], state);
172         if (!newState) {
173             break;
174         }
175         state = newState;
176         ++fOffset;
177     }
178     Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
179     return )" << token << R"((kind, startOffset, fOffset - startOffset);
180 }
181 
182 } // namespace
183 )";
184 }
185 
process(const char * inPath,const char * lexer,const char * token,const char * hPath,const char * cppPath)186 static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
187                     const char* cppPath) {
188     NFA nfa;
189     std::vector<std::string> tokens;
190     tokens.push_back("END_OF_FILE");
191     std::string line;
192     std::ifstream in(inPath);
193     while (std::getline(in, line)) {
194         if (line.length() == 0) {
195             continue;
196         }
197         if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
198             continue;
199         }
200         std::istringstream split(line);
201         std::string name, delimiter, pattern;
202         if (split >> name >> delimiter >> pattern) {
203             SkASSERT(split.eof());
204             SkASSERT(name != "");
205             SkASSERT(delimiter == "=");
206             SkASSERT(pattern != "");
207             tokens.push_back(name);
208             if (pattern[0] == '"') {
209                 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
210                 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
211                 for (size_t i = 2; i < pattern.size() - 1; ++i) {
212                     node = RegexNode(RegexNode::kConcat_Kind, node,
213                                      RegexNode(RegexNode::kChar_Kind, pattern[i]));
214                 }
215                 nfa.addRegex(node);
216             }
217             else {
218                 nfa.addRegex(RegexParser().parse(pattern));
219             }
220         }
221     }
222     NFAtoDFA converter(&nfa);
223     DFA dfa = converter.convert();
224     writeH(dfa, lexer, token, tokens, hPath);
225     writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
226 }
227 
main(int argc,const char ** argv)228 int main(int argc, const char** argv) {
229     if (argc != 6) {
230         printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
231         exit(1);
232     }
233     process(argv[1], argv[2], argv[3], argv[4], argv[5]);
234     return 0;
235 }
236