xref: /aosp_15_r20/external/skia/src/sksl/lex/Main.cpp (revision c8dee2aa9b3f27cf6c858bd81872bdeb2c07ed17)
1*c8dee2aaSAndroid Build Coastguard Worker /*
2*c8dee2aaSAndroid Build Coastguard Worker  * Copyright 2017 Google Inc.
3*c8dee2aaSAndroid Build Coastguard Worker  *
4*c8dee2aaSAndroid Build Coastguard Worker  * Use of this source code is governed by a BSD-style license that can be
5*c8dee2aaSAndroid Build Coastguard Worker  * found in the LICENSE file.
6*c8dee2aaSAndroid Build Coastguard Worker  */
7*c8dee2aaSAndroid Build Coastguard Worker 
8*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/DFA.h"
9*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/LexUtil.h"
10*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/NFA.h"
11*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/NFAtoDFA.h"
12*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/RegexNode.h"
13*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/RegexParser.h"
14*c8dee2aaSAndroid Build Coastguard Worker #include "src/sksl/lex/TransitionTable.h"
15*c8dee2aaSAndroid Build Coastguard Worker 
16*c8dee2aaSAndroid Build Coastguard Worker #include <stdio.h>
17*c8dee2aaSAndroid Build Coastguard Worker #include <stdlib.h>
18*c8dee2aaSAndroid Build Coastguard Worker #include <algorithm>
19*c8dee2aaSAndroid Build Coastguard Worker #include <sstream>
20*c8dee2aaSAndroid Build Coastguard Worker #include <string>
21*c8dee2aaSAndroid Build Coastguard Worker #include <vector>
22*c8dee2aaSAndroid Build Coastguard Worker 
23*c8dee2aaSAndroid Build Coastguard Worker /**
24*c8dee2aaSAndroid Build Coastguard Worker  * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
25*c8dee2aaSAndroid Build Coastguard Worker  * file is a text file with one token definition per line. Each line is of the form:
26*c8dee2aaSAndroid Build Coastguard Worker  * <TOKEN_NAME> = <pattern>
27*c8dee2aaSAndroid Build Coastguard Worker  * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
28*c8dee2aaSAndroid Build Coastguard Worker  */
29*c8dee2aaSAndroid Build Coastguard Worker 
30*c8dee2aaSAndroid Build Coastguard Worker static constexpr const char HEADER[] =
31*c8dee2aaSAndroid Build Coastguard Worker     "/*\n"
32*c8dee2aaSAndroid Build Coastguard Worker     " * Copyright 2017 Google Inc.\n"
33*c8dee2aaSAndroid Build Coastguard Worker     " *\n"
34*c8dee2aaSAndroid Build Coastguard Worker     " * Use of this source code is governed by a BSD-style license that can be\n"
35*c8dee2aaSAndroid Build Coastguard Worker     " * found in the LICENSE file.\n"
36*c8dee2aaSAndroid Build Coastguard Worker     " */\n"
37*c8dee2aaSAndroid Build Coastguard Worker     "/*****************************************************************************************\n"
38*c8dee2aaSAndroid Build Coastguard Worker     " ******************** This file was generated by sksllex. Do not edit. *******************\n"
39*c8dee2aaSAndroid Build Coastguard Worker     " *****************************************************************************************/\n";
40*c8dee2aaSAndroid Build Coastguard Worker 
writeH(const DFA & dfa,const char * lexer,const char * token,const std::vector<std::string> & tokens,const char * hPath)41*c8dee2aaSAndroid Build Coastguard Worker static void writeH(const DFA& dfa, const char* lexer, const char* token,
42*c8dee2aaSAndroid Build Coastguard Worker                    const std::vector<std::string>& tokens, const char* hPath) {
43*c8dee2aaSAndroid Build Coastguard Worker     std::ofstream out(hPath);
44*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(out.good());
45*c8dee2aaSAndroid Build Coastguard Worker     out << HEADER;
46*c8dee2aaSAndroid Build Coastguard Worker     out << "#ifndef SKSL_" << lexer << "\n";
47*c8dee2aaSAndroid Build Coastguard Worker     out << "#define SKSL_" << lexer << "\n";
48*c8dee2aaSAndroid Build Coastguard Worker     out << "#include <cstdint>\n";
49*c8dee2aaSAndroid Build Coastguard Worker     out << "#include <string_view>\n";
50*c8dee2aaSAndroid Build Coastguard Worker     out << "namespace SkSL {\n";
51*c8dee2aaSAndroid Build Coastguard Worker     out << "\n";
52*c8dee2aaSAndroid Build Coastguard Worker     out << "struct " << token << " {\n";
53*c8dee2aaSAndroid Build Coastguard Worker     out << "    enum class Kind {\n";
54*c8dee2aaSAndroid Build Coastguard Worker     for (const std::string& t : tokens) {
55*c8dee2aaSAndroid Build Coastguard Worker         out << "        TK_" << t << ",\n";
56*c8dee2aaSAndroid Build Coastguard Worker     }
57*c8dee2aaSAndroid Build Coastguard Worker     out << "        TK_NONE,";
58*c8dee2aaSAndroid Build Coastguard Worker     out << R"(
59*c8dee2aaSAndroid Build Coastguard Worker     };
60*c8dee2aaSAndroid Build Coastguard Worker 
61*c8dee2aaSAndroid Build Coastguard Worker     )" << token << "() {}";
62*c8dee2aaSAndroid Build Coastguard Worker 
63*c8dee2aaSAndroid Build Coastguard Worker     out << token << R"((Kind kind, int32_t offset, int32_t length)
64*c8dee2aaSAndroid Build Coastguard Worker     : fKind(kind)
65*c8dee2aaSAndroid Build Coastguard Worker     , fOffset(offset)
66*c8dee2aaSAndroid Build Coastguard Worker     , fLength(length) {}
67*c8dee2aaSAndroid Build Coastguard Worker 
68*c8dee2aaSAndroid Build Coastguard Worker     Kind fKind      = Kind::TK_NONE;
69*c8dee2aaSAndroid Build Coastguard Worker     int32_t fOffset = -1;
70*c8dee2aaSAndroid Build Coastguard Worker     int32_t fLength = -1;
71*c8dee2aaSAndroid Build Coastguard Worker };
72*c8dee2aaSAndroid Build Coastguard Worker 
73*c8dee2aaSAndroid Build Coastguard Worker class )" << lexer << R"( {
74*c8dee2aaSAndroid Build Coastguard Worker public:
75*c8dee2aaSAndroid Build Coastguard Worker     void start(std::string_view text) {
76*c8dee2aaSAndroid Build Coastguard Worker         fText = text;
77*c8dee2aaSAndroid Build Coastguard Worker         fOffset = 0;
78*c8dee2aaSAndroid Build Coastguard Worker     }
79*c8dee2aaSAndroid Build Coastguard Worker 
80*c8dee2aaSAndroid Build Coastguard Worker     )" << token << R"( next();
81*c8dee2aaSAndroid Build Coastguard Worker 
82*c8dee2aaSAndroid Build Coastguard Worker     struct Checkpoint {
83*c8dee2aaSAndroid Build Coastguard Worker         int32_t fOffset;
84*c8dee2aaSAndroid Build Coastguard Worker     };
85*c8dee2aaSAndroid Build Coastguard Worker 
86*c8dee2aaSAndroid Build Coastguard Worker     Checkpoint getCheckpoint() const {
87*c8dee2aaSAndroid Build Coastguard Worker         return {fOffset};
88*c8dee2aaSAndroid Build Coastguard Worker     }
89*c8dee2aaSAndroid Build Coastguard Worker 
90*c8dee2aaSAndroid Build Coastguard Worker     void rewindToCheckpoint(Checkpoint checkpoint) {
91*c8dee2aaSAndroid Build Coastguard Worker         fOffset = checkpoint.fOffset;
92*c8dee2aaSAndroid Build Coastguard Worker     }
93*c8dee2aaSAndroid Build Coastguard Worker 
94*c8dee2aaSAndroid Build Coastguard Worker private:
95*c8dee2aaSAndroid Build Coastguard Worker     std::string_view fText;
96*c8dee2aaSAndroid Build Coastguard Worker     int32_t fOffset;
97*c8dee2aaSAndroid Build Coastguard Worker };
98*c8dee2aaSAndroid Build Coastguard Worker 
99*c8dee2aaSAndroid Build Coastguard Worker } // namespace
100*c8dee2aaSAndroid Build Coastguard Worker #endif
101*c8dee2aaSAndroid Build Coastguard Worker )";
102*c8dee2aaSAndroid Build Coastguard Worker }
103*c8dee2aaSAndroid Build Coastguard Worker 
writeCPP(const DFA & dfa,const char * lexer,const char * token,const char * include,const char * cppPath)104*c8dee2aaSAndroid Build Coastguard Worker static void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
105*c8dee2aaSAndroid Build Coastguard Worker                      const char* cppPath) {
106*c8dee2aaSAndroid Build Coastguard Worker     std::ofstream out(cppPath);
107*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(out.good());
108*c8dee2aaSAndroid Build Coastguard Worker     out << HEADER;
109*c8dee2aaSAndroid Build Coastguard Worker     out << "#include \"" << include << "\"\n";
110*c8dee2aaSAndroid Build Coastguard Worker     out << "\n";
111*c8dee2aaSAndroid Build Coastguard Worker     out << "namespace SkSL {\n";
112*c8dee2aaSAndroid Build Coastguard Worker     out << "\n";
113*c8dee2aaSAndroid Build Coastguard Worker 
114*c8dee2aaSAndroid Build Coastguard Worker     size_t states = 0;
115*c8dee2aaSAndroid Build Coastguard Worker     for (const auto& row : dfa.fTransitions) {
116*c8dee2aaSAndroid Build Coastguard Worker         states = std::max(states, row.size());
117*c8dee2aaSAndroid Build Coastguard Worker     }
118*c8dee2aaSAndroid Build Coastguard Worker     out << "using State = " << (states <= 256 ? "uint8_t" : "uint16_t") << ";\n";
119*c8dee2aaSAndroid Build Coastguard Worker 
120*c8dee2aaSAndroid Build Coastguard Worker     // Find the first character mapped in our DFA.
121*c8dee2aaSAndroid Build Coastguard Worker     size_t startChar = 0;
122*c8dee2aaSAndroid Build Coastguard Worker     for (; startChar < dfa.fCharMappings.size(); ++startChar) {
123*c8dee2aaSAndroid Build Coastguard Worker         if (dfa.fCharMappings[startChar] != 0) {
124*c8dee2aaSAndroid Build Coastguard Worker             break;
125*c8dee2aaSAndroid Build Coastguard Worker         }
126*c8dee2aaSAndroid Build Coastguard Worker     }
127*c8dee2aaSAndroid Build Coastguard Worker 
128*c8dee2aaSAndroid Build Coastguard Worker     // Arbitrarily-chosen character which is greater than startChar, and should not appear in actual
129*c8dee2aaSAndroid Build Coastguard Worker     // input.
130*c8dee2aaSAndroid Build Coastguard Worker     SkASSERT(startChar < 18);
131*c8dee2aaSAndroid Build Coastguard Worker     out << "static constexpr uint8_t kInvalidChar = 18;";
132*c8dee2aaSAndroid Build Coastguard Worker     out << "static constexpr uint8_t kMappings[" << dfa.fCharMappings.size() - startChar << "] = {";
133*c8dee2aaSAndroid Build Coastguard Worker     for (size_t index = startChar; index < dfa.fCharMappings.size(); ++index) {
134*c8dee2aaSAndroid Build Coastguard Worker         out << std::to_string(dfa.fCharMappings[index]) << ", ";
135*c8dee2aaSAndroid Build Coastguard Worker     }
136*c8dee2aaSAndroid Build Coastguard Worker     out << "};\n";
137*c8dee2aaSAndroid Build Coastguard Worker 
138*c8dee2aaSAndroid Build Coastguard Worker     WriteTransitionTable(out, dfa, states);
139*c8dee2aaSAndroid Build Coastguard Worker 
140*c8dee2aaSAndroid Build Coastguard Worker     out << "static const uint8_t kAccepts[" << states << "] = {";
141*c8dee2aaSAndroid Build Coastguard Worker     for (size_t i = 0; i < states; ++i) {
142*c8dee2aaSAndroid Build Coastguard Worker         if (i < dfa.fAccepts.size() && dfa.fAccepts[i] != INVALID) {
143*c8dee2aaSAndroid Build Coastguard Worker             out << " " << dfa.fAccepts[i] << ",";
144*c8dee2aaSAndroid Build Coastguard Worker         } else {
145*c8dee2aaSAndroid Build Coastguard Worker             out << " 255,";
146*c8dee2aaSAndroid Build Coastguard Worker         }
147*c8dee2aaSAndroid Build Coastguard Worker     }
148*c8dee2aaSAndroid Build Coastguard Worker     out << "};\n";
149*c8dee2aaSAndroid Build Coastguard Worker     out << "\n";
150*c8dee2aaSAndroid Build Coastguard Worker 
151*c8dee2aaSAndroid Build Coastguard Worker     out << token << " " << lexer << "::next() {";
152*c8dee2aaSAndroid Build Coastguard Worker     out << R"(
153*c8dee2aaSAndroid Build Coastguard Worker     // Note that we cheat here: normally a lexer needs to worry about the case
154*c8dee2aaSAndroid Build Coastguard Worker     // where a token has a prefix which is not itself a valid token - for instance,
155*c8dee2aaSAndroid Build Coastguard Worker     // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
156*c8dee2aaSAndroid Build Coastguard Worker     // tokens. Our grammar doesn't have this property, so we can simplify the logic
157*c8dee2aaSAndroid Build Coastguard Worker     // a bit.
158*c8dee2aaSAndroid Build Coastguard Worker     int32_t startOffset = fOffset;
159*c8dee2aaSAndroid Build Coastguard Worker     State   state = 1;
160*c8dee2aaSAndroid Build Coastguard Worker     for (;;) {
161*c8dee2aaSAndroid Build Coastguard Worker         if (fOffset >= (int32_t)fText.length()) {
162*c8dee2aaSAndroid Build Coastguard Worker             if (startOffset == (int32_t)fText.length() || kAccepts[state] == 255) {
163*c8dee2aaSAndroid Build Coastguard Worker                 return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0);
164*c8dee2aaSAndroid Build Coastguard Worker             }
165*c8dee2aaSAndroid Build Coastguard Worker             break;
166*c8dee2aaSAndroid Build Coastguard Worker         }
167*c8dee2aaSAndroid Build Coastguard Worker         uint8_t c = (uint8_t)(fText[fOffset] - )" << startChar << R"();
168*c8dee2aaSAndroid Build Coastguard Worker         if (c >= )" << dfa.fCharMappings.size() - startChar << R"() {
169*c8dee2aaSAndroid Build Coastguard Worker             c = kInvalidChar;
170*c8dee2aaSAndroid Build Coastguard Worker         }
171*c8dee2aaSAndroid Build Coastguard Worker         State newState = get_transition(kMappings[c], state);
172*c8dee2aaSAndroid Build Coastguard Worker         if (!newState) {
173*c8dee2aaSAndroid Build Coastguard Worker             break;
174*c8dee2aaSAndroid Build Coastguard Worker         }
175*c8dee2aaSAndroid Build Coastguard Worker         state = newState;
176*c8dee2aaSAndroid Build Coastguard Worker         ++fOffset;
177*c8dee2aaSAndroid Build Coastguard Worker     }
178*c8dee2aaSAndroid Build Coastguard Worker     Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
179*c8dee2aaSAndroid Build Coastguard Worker     return )" << token << R"((kind, startOffset, fOffset - startOffset);
180*c8dee2aaSAndroid Build Coastguard Worker }
181*c8dee2aaSAndroid Build Coastguard Worker 
182*c8dee2aaSAndroid Build Coastguard Worker } // namespace
183*c8dee2aaSAndroid Build Coastguard Worker )";
184*c8dee2aaSAndroid Build Coastguard Worker }
185*c8dee2aaSAndroid Build Coastguard Worker 
process(const char * inPath,const char * lexer,const char * token,const char * hPath,const char * cppPath)186*c8dee2aaSAndroid Build Coastguard Worker static void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
187*c8dee2aaSAndroid Build Coastguard Worker                     const char* cppPath) {
188*c8dee2aaSAndroid Build Coastguard Worker     NFA nfa;
189*c8dee2aaSAndroid Build Coastguard Worker     std::vector<std::string> tokens;
190*c8dee2aaSAndroid Build Coastguard Worker     tokens.push_back("END_OF_FILE");
191*c8dee2aaSAndroid Build Coastguard Worker     std::string line;
192*c8dee2aaSAndroid Build Coastguard Worker     std::ifstream in(inPath);
193*c8dee2aaSAndroid Build Coastguard Worker     while (std::getline(in, line)) {
194*c8dee2aaSAndroid Build Coastguard Worker         if (line.length() == 0) {
195*c8dee2aaSAndroid Build Coastguard Worker             continue;
196*c8dee2aaSAndroid Build Coastguard Worker         }
197*c8dee2aaSAndroid Build Coastguard Worker         if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
198*c8dee2aaSAndroid Build Coastguard Worker             continue;
199*c8dee2aaSAndroid Build Coastguard Worker         }
200*c8dee2aaSAndroid Build Coastguard Worker         std::istringstream split(line);
201*c8dee2aaSAndroid Build Coastguard Worker         std::string name, delimiter, pattern;
202*c8dee2aaSAndroid Build Coastguard Worker         if (split >> name >> delimiter >> pattern) {
203*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(split.eof());
204*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(name != "");
205*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(delimiter == "=");
206*c8dee2aaSAndroid Build Coastguard Worker             SkASSERT(pattern != "");
207*c8dee2aaSAndroid Build Coastguard Worker             tokens.push_back(name);
208*c8dee2aaSAndroid Build Coastguard Worker             if (pattern[0] == '"') {
209*c8dee2aaSAndroid Build Coastguard Worker                 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
210*c8dee2aaSAndroid Build Coastguard Worker                 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
211*c8dee2aaSAndroid Build Coastguard Worker                 for (size_t i = 2; i < pattern.size() - 1; ++i) {
212*c8dee2aaSAndroid Build Coastguard Worker                     node = RegexNode(RegexNode::kConcat_Kind, node,
213*c8dee2aaSAndroid Build Coastguard Worker                                      RegexNode(RegexNode::kChar_Kind, pattern[i]));
214*c8dee2aaSAndroid Build Coastguard Worker                 }
215*c8dee2aaSAndroid Build Coastguard Worker                 nfa.addRegex(node);
216*c8dee2aaSAndroid Build Coastguard Worker             }
217*c8dee2aaSAndroid Build Coastguard Worker             else {
218*c8dee2aaSAndroid Build Coastguard Worker                 nfa.addRegex(RegexParser().parse(pattern));
219*c8dee2aaSAndroid Build Coastguard Worker             }
220*c8dee2aaSAndroid Build Coastguard Worker         }
221*c8dee2aaSAndroid Build Coastguard Worker     }
222*c8dee2aaSAndroid Build Coastguard Worker     NFAtoDFA converter(&nfa);
223*c8dee2aaSAndroid Build Coastguard Worker     DFA dfa = converter.convert();
224*c8dee2aaSAndroid Build Coastguard Worker     writeH(dfa, lexer, token, tokens, hPath);
225*c8dee2aaSAndroid Build Coastguard Worker     writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
226*c8dee2aaSAndroid Build Coastguard Worker }
227*c8dee2aaSAndroid Build Coastguard Worker 
main(int argc,const char ** argv)228*c8dee2aaSAndroid Build Coastguard Worker int main(int argc, const char** argv) {
229*c8dee2aaSAndroid Build Coastguard Worker     if (argc != 6) {
230*c8dee2aaSAndroid Build Coastguard Worker         printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
231*c8dee2aaSAndroid Build Coastguard Worker         exit(1);
232*c8dee2aaSAndroid Build Coastguard Worker     }
233*c8dee2aaSAndroid Build Coastguard Worker     process(argv[1], argv[2], argv[3], argv[4], argv[5]);
234*c8dee2aaSAndroid Build Coastguard Worker     return 0;
235*c8dee2aaSAndroid Build Coastguard Worker }
236