1 /*
2 * Copyright (C) 2023 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/perfetto_sql/preprocessor/perfetto_sql_preprocessor.h"
18
19 #include <algorithm>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <list>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <utility>
30 #include <variant>
31 #include <vector>
32
33 #include "perfetto/base/compiler.h"
34 #include "perfetto/base/logging.h"
35 #include "perfetto/base/status.h"
36 #include "perfetto/ext/base/flat_hash_map.h"
37 #include "perfetto/ext/base/string_utils.h"
38 #include "src/trace_processor/perfetto_sql/preprocessor/preprocessor_grammar_interface.h"
39 #include "src/trace_processor/perfetto_sql/tokenizer/sqlite_tokenizer.h"
40 #include "src/trace_processor/sqlite/sql_source.h"
41
42 namespace perfetto::trace_processor {
43 namespace {
44
45 using State = PreprocessorGrammarState;
46
47 struct Preprocessor {
48 public:
Preprocessorperfetto::trace_processor::__anon99d96b220111::Preprocessor49 explicit Preprocessor(State* state)
50 : parser_(PreprocessorGrammarParseAlloc(malloc, state)) {}
~Preprocessorperfetto::trace_processor::__anon99d96b220111::Preprocessor51 ~Preprocessor() { PreprocessorGrammarParseFree(parser_, free); }
52
Parseperfetto::trace_processor::__anon99d96b220111::Preprocessor53 void Parse(int token_type, PreprocessorGrammarToken token) {
54 PreprocessorGrammarParse(parser_, token_type, token);
55 }
56
57 private:
58 void* parser_;
59 };
60
61 struct Stringify {
62 bool ignore_table;
63 };
64 struct Apply {
65 int join_token;
66 int prefix_token;
67 };
68 using MacroImpl =
69 std::variant<PerfettoSqlPreprocessor::Macro*, Stringify, Apply>;
70
71 // Synthetic "stackframe" representing the processing of a single piece of SQL.
72 struct Frame {
73 struct Root {};
74 struct Rewrite {
75 SqliteTokenizer& tokenizer;
76 SqlSource::Rewriter& rewriter;
77 SqliteTokenizer::Token start;
78 SqliteTokenizer::Token end;
79 };
80 struct Append {
81 std::vector<SqlSource>& result;
82 };
83 using Type = std::variant<Root, Rewrite, Append>;
84 struct ActiveMacro {
85 std::string name;
86 MacroImpl impl;
87 std::vector<SqlSource> args;
88 uint32_t nested_macro_count;
89 std::unordered_set<std::string> seen_variables;
90 std::unordered_set<std::string> expanded_variables;
91 };
92 enum VariableHandling { kLookup, kLookupOrIgnore, kIgnore };
93
Frameperfetto::trace_processor::__anon99d96b220111::Frame94 explicit Frame(Type _type,
95 VariableHandling _var_handling,
96 State* s,
97 const SqlSource& source)
98 : type(_type),
99 var_handling(_var_handling),
100 preprocessor(s),
101 tokenizer(source),
102 rewriter(source),
103 substituitions(&owned_substituitions) {}
104 Frame(const Frame&) = delete;
105 Frame& operator=(const Frame&) = delete;
106 Frame(Frame&&) = delete;
107 Frame& operator=(Frame&&) = delete;
108
109 Type type;
110 VariableHandling var_handling;
111 Preprocessor preprocessor;
112 SqliteTokenizer tokenizer;
113
114 bool seen_semicolon = false;
115 SqlSource::Rewriter rewriter;
116 bool ignore_rewrite = false;
117
118 std::optional<ActiveMacro> active_macro;
119
120 base::FlatHashMap<std::string, SqlSource> owned_substituitions;
121 base::FlatHashMap<std::string, SqlSource>* substituitions;
122 };
123
124 struct ErrorToken {
125 SqliteTokenizer::Token token;
126 std::string message;
127 };
128
129 extern "C" struct PreprocessorGrammarState {
130 std::list<Frame> stack;
131 const base::FlatHashMap<std::string, PerfettoSqlPreprocessor::Macro>& macros;
132 std::optional<ErrorToken> error;
133 };
134
135 extern "C" struct PreprocessorGrammarApplyList {
136 std::vector<PreprocessorGrammarTokenBounds> args;
137 };
138
GrammarTokenToTokenizerToken(const PreprocessorGrammarToken & token)139 SqliteTokenizer::Token GrammarTokenToTokenizerToken(
140 const PreprocessorGrammarToken& token) {
141 return SqliteTokenizer::Token{std::string_view(token.ptr, token.n),
142 TK_ILLEGAL};
143 }
144
ErrorAtToken(const SqliteTokenizer & tokenizer,const SqliteTokenizer::Token & token,const char * error)145 base::Status ErrorAtToken(const SqliteTokenizer& tokenizer,
146 const SqliteTokenizer::Token& token,
147 const char* error) {
148 std::string traceback = tokenizer.AsTraceback(token);
149 return base::ErrStatus("%s%s", traceback.c_str(), error);
150 }
151
SqlSourceVectorToString(const std::vector<SqlSource> & vec)152 std::vector<std::string> SqlSourceVectorToString(
153 const std::vector<SqlSource>& vec) {
154 std::vector<std::string> pieces;
155 pieces.reserve(vec.size());
156 for (const auto& list : vec) {
157 pieces.emplace_back(list.sql());
158 }
159 return pieces;
160 }
161
BoundsToStringView(const PreprocessorGrammarTokenBounds & b)162 std::string_view BoundsToStringView(const PreprocessorGrammarTokenBounds& b) {
163 return {b.start.ptr, static_cast<size_t>(b.end.ptr + b.end.n - b.start.ptr)};
164 }
165
RewriteIntrinsicMacro(Frame & frame,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)166 void RewriteIntrinsicMacro(Frame& frame,
167 SqliteTokenizer::Token name,
168 SqliteTokenizer::Token rp) {
169 const auto& macro = *frame.active_macro;
170 frame.tokenizer.Rewrite(
171 frame.rewriter, name, rp,
172 SqlSource::FromTraceProcessorImplementation(
173 macro.name + "!(" +
174 base::Join(SqlSourceVectorToString(macro.args), ", ") + ")"),
175 SqliteTokenizer::EndToken::kInclusive);
176 }
177
ExecuteSqlMacro(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)178 void ExecuteSqlMacro(State* state,
179 Frame& frame,
180 Frame::ActiveMacro& macro,
181 SqliteTokenizer::Token name,
182 SqliteTokenizer::Token rp) {
183 auto& sql_macro = std::get<PerfettoSqlPreprocessor::Macro*>(macro.impl);
184 if (macro.args.size() != sql_macro->args.size()) {
185 state->error = ErrorToken{
186 name,
187 base::ErrStatus(
188 "wrong number of macro arguments, expected %zu actual %zu",
189 sql_macro->args.size(), macro.args.size())
190 .message(),
191 };
192 return;
193 }
194 // TODO(lalitm): switch back to kLookup once we have proper parser support.
195 state->stack.emplace_back(
196 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
197 Frame::kLookupOrIgnore, state, sql_macro->sql);
198 auto& macro_frame = state->stack.back();
199 for (uint32_t i = 0; i < sql_macro->args.size(); ++i) {
200 macro_frame.owned_substituitions.Insert(sql_macro->args[i],
201 std::move(macro.args[i]));
202 }
203 }
204
ExecuteStringify(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)205 void ExecuteStringify(State* state,
206 Frame& frame,
207 Frame::ActiveMacro& macro,
208 SqliteTokenizer::Token name,
209 SqliteTokenizer::Token rp) {
210 auto& stringify = std::get<Stringify>(macro.impl);
211 if (macro.args.size() != 1) {
212 state->error = ErrorToken{
213 name,
214 base::ErrStatus(
215 "stringify: must specify exactly 1 argument, actual %zu",
216 macro.args.size())
217 .message(),
218 };
219 return;
220 }
221 bool can_stringify_outer =
222 macro.seen_variables.empty() ||
223 (stringify.ignore_table && macro.seen_variables.size() == 1 &&
224 macro.seen_variables.count("table"));
225 if (!can_stringify_outer) {
226 RewriteIntrinsicMacro(frame, name, rp);
227 return;
228 }
229 if (!macro.expanded_variables.empty()) {
230 state->stack.emplace_back(
231 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
232 Frame::kIgnore, state,
233 SqlSource::FromTraceProcessorImplementation(macro.name + "!(" +
234 macro.args[0].sql() + ")"));
235 auto& expand_frame = state->stack.back();
236 expand_frame.substituitions = frame.substituitions;
237 return;
238 }
239 auto res = SqlSource::FromTraceProcessorImplementation(
240 "'" + macro.args[0].sql() + "'");
241 frame.tokenizer.Rewrite(frame.rewriter, name, rp, std::move(res),
242 SqliteTokenizer::EndToken::kInclusive);
243 }
244
ExecuteApply(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)245 void ExecuteApply(State* state,
246 Frame& frame,
247 Frame::ActiveMacro& macro,
248 SqliteTokenizer::Token name,
249 SqliteTokenizer::Token rp) {
250 auto& apply = std::get<Apply>(macro.impl);
251 if (!macro.seen_variables.empty()) {
252 RewriteIntrinsicMacro(frame, name, rp);
253 return;
254 }
255 state->stack.emplace_back(
256 Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
257 Frame::VariableHandling::kIgnore, state,
258 SqlSource::FromTraceProcessorImplementation(
259 base::Join(SqlSourceVectorToString(macro.args), " ")));
260
261 auto& expansion_frame = state->stack.back();
262 expansion_frame.preprocessor.Parse(
263 PPTK_APPLY, PreprocessorGrammarToken{nullptr, 0, PPTK_APPLY});
264 expansion_frame.preprocessor.Parse(
265 apply.join_token, PreprocessorGrammarToken{nullptr, 0, apply.join_token});
266 expansion_frame.preprocessor.Parse(
267 apply.prefix_token,
268 PreprocessorGrammarToken{nullptr, 0, apply.prefix_token});
269 expansion_frame.ignore_rewrite = true;
270 }
271
OnPreprocessorSyntaxError(State * state,PreprocessorGrammarToken * token)272 extern "C" void OnPreprocessorSyntaxError(State* state,
273 PreprocessorGrammarToken* token) {
274 state->error = {GrammarTokenToTokenizerToken(*token),
275 "preprocessor syntax error"};
276 }
277
OnPreprocessorApply(PreprocessorGrammarState * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * join,PreprocessorGrammarToken * prefix,PreprocessorGrammarApplyList * raw_a,PreprocessorGrammarApplyList * raw_b)278 extern "C" void OnPreprocessorApply(PreprocessorGrammarState* state,
279 PreprocessorGrammarToken* name,
280 PreprocessorGrammarToken* join,
281 PreprocessorGrammarToken* prefix,
282 PreprocessorGrammarApplyList* raw_a,
283 PreprocessorGrammarApplyList* raw_b) {
284 std::unique_ptr<PreprocessorGrammarApplyList> a(raw_a);
285 std::unique_ptr<PreprocessorGrammarApplyList> b(raw_b);
286 auto& frame = state->stack.back();
287 size_t size = std::min(a->args.size(), b ? b->args.size() : a->args.size());
288 if (size == 0) {
289 auto& rewrite = std::get<Frame::Rewrite>(frame.type);
290 rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
291 SqlSource::FromTraceProcessorImplementation(""),
292 SqliteTokenizer::EndToken::kInclusive);
293 return;
294 }
295 std::string macro(name->ptr, name->n);
296 std::vector<std::string> args;
297 for (uint32_t i = 0; i < size; ++i) {
298 std::string arg = macro;
299 arg.append("!(").append(BoundsToStringView(a->args[i]));
300 if (b) {
301 arg.append(",").append(BoundsToStringView(b->args[i]));
302 }
303 arg.append(")");
304 args.emplace_back(std::move(arg));
305 }
306 std::string joiner = join->major == PPTK_AND ? " AND " : " , ";
307 std::string res = prefix->major == PPTK_TRUE ? joiner : "";
308 res.append(base::Join(args, joiner));
309 state->stack.emplace_back(
310 frame.type, Frame::VariableHandling::kLookupOrIgnore, state,
311 SqlSource::FromTraceProcessorImplementation(std::move(res)));
312 }
313
OnPreprocessorVariable(State * state,PreprocessorGrammarToken * var)314 extern "C" void OnPreprocessorVariable(State* state,
315 PreprocessorGrammarToken* var) {
316 if (var->n == 0 || var->ptr[0] != '$') {
317 state->error = {GrammarTokenToTokenizerToken(*var),
318 "variable must start with '$'"};
319 return;
320 }
321 auto& frame = state->stack.back();
322 if (frame.active_macro) {
323 std::string name(var->ptr + 1, var->n - 1);
324 if (frame.substituitions->Find(name)) {
325 frame.active_macro->expanded_variables.insert(name);
326 } else {
327 frame.active_macro->seen_variables.insert(name);
328 }
329 return;
330 }
331 switch (frame.var_handling) {
332 case Frame::kLookup:
333 case Frame::kLookupOrIgnore: {
334 auto* it =
335 frame.substituitions->Find(std::string(var->ptr + 1, var->n - 1));
336 if (!it) {
337 if (frame.var_handling == Frame::kLookup) {
338 state->error = {GrammarTokenToTokenizerToken(*var),
339 "variable not defined"};
340 }
341 return;
342 }
343 frame.tokenizer.RewriteToken(frame.rewriter,
344 GrammarTokenToTokenizerToken(*var), *it);
345 break;
346 }
347 case Frame::kIgnore:
348 break;
349 }
350 }
351
OnPreprocessorMacroId(State * state,PreprocessorGrammarToken * name_tok)352 extern "C" void OnPreprocessorMacroId(State* state,
353 PreprocessorGrammarToken* name_tok) {
354 auto& invocation = state->stack.back();
355 if (invocation.active_macro) {
356 invocation.active_macro->nested_macro_count++;
357 return;
358 }
359 std::string name(name_tok->ptr, name_tok->n);
360 MacroImpl impl;
361 if (name == "__intrinsic_stringify") {
362 impl = Stringify();
363 } else if (name == "__intrinsic_stringify_ignore_table") {
364 impl = Stringify{true};
365 } else if (name == "__intrinsic_token_apply") {
366 impl = Apply{PPTK_COMMA, PPTK_FALSE};
367 } else if (name == "__intrinsic_token_apply_prefix") {
368 impl = Apply{PPTK_COMMA, PPTK_TRUE};
369 } else if (name == "__intrinsic_token_apply_and") {
370 impl = Apply{PPTK_AND, PPTK_FALSE};
371 } else if (name == "__intrinsic_token_apply_and_prefix") {
372 impl = Apply{PPTK_AND, PPTK_TRUE};
373 } else {
374 auto* sql_macro = state->macros.Find(name);
375 if (!sql_macro) {
376 state->error = {GrammarTokenToTokenizerToken(*name_tok),
377 "no such macro defined"};
378 return;
379 }
380 impl = sql_macro;
381 }
382 invocation.active_macro =
383 Frame::ActiveMacro{std::move(name), impl, {}, 0, {}, {}};
384 }
385
OnPreprocessorMacroArg(State * state,PreprocessorGrammarTokenBounds * arg)386 extern "C" void OnPreprocessorMacroArg(State* state,
387 PreprocessorGrammarTokenBounds* arg) {
388 auto& frame = state->stack.back();
389 auto& macro = *frame.active_macro;
390 if (macro.nested_macro_count > 0) {
391 return;
392 }
393 auto start_token = GrammarTokenToTokenizerToken(arg->start);
394 auto end_token = GrammarTokenToTokenizerToken(arg->end);
395 state->stack.emplace_back(
396 Frame::Append{macro.args}, frame.var_handling, state,
397 frame.tokenizer.Substr(start_token, end_token,
398 SqliteTokenizer::EndToken::kInclusive));
399
400 auto& arg_frame = state->stack.back();
401 arg_frame.substituitions = frame.substituitions;
402 }
403
OnPreprocessorMacroEnd(State * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * rp)404 extern "C" void OnPreprocessorMacroEnd(State* state,
405 PreprocessorGrammarToken* name,
406 PreprocessorGrammarToken* rp) {
407 auto& frame = state->stack.back();
408 auto& macro = *frame.active_macro;
409 if (macro.nested_macro_count > 0) {
410 --macro.nested_macro_count;
411 return;
412 }
413 switch (macro.impl.index()) {
414 case base::variant_index<MacroImpl, PerfettoSqlPreprocessor::Macro*>():
415 ExecuteSqlMacro(state, frame, macro, GrammarTokenToTokenizerToken(*name),
416 GrammarTokenToTokenizerToken(*rp));
417 break;
418 case base::variant_index<MacroImpl, Stringify>():
419 ExecuteStringify(state, frame, macro, GrammarTokenToTokenizerToken(*name),
420 GrammarTokenToTokenizerToken(*rp));
421 break;
422 case base::variant_index<MacroImpl, Apply>():
423 ExecuteApply(state, frame, macro, GrammarTokenToTokenizerToken(*name),
424 GrammarTokenToTokenizerToken(*rp));
425 break;
426 default:
427 PERFETTO_FATAL("Unknown variant type");
428 }
429 frame.active_macro = std::nullopt;
430 }
431
OnPreprocessorEnd(State * state)432 extern "C" void OnPreprocessorEnd(State* state) {
433 auto& frame = state->stack.back();
434 PERFETTO_CHECK(!frame.active_macro);
435
436 if (frame.ignore_rewrite) {
437 return;
438 }
439 switch (frame.type.index()) {
440 case base::variant_index<Frame::Type, Frame::Append>(): {
441 auto& append = std::get<Frame::Append>(frame.type);
442 append.result.push_back(std::move(frame.rewriter).Build());
443 break;
444 }
445 case base::variant_index<Frame::Type, Frame::Rewrite>(): {
446 auto& rewrite = std::get<Frame::Rewrite>(frame.type);
447 rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
448 std::move(frame.rewriter).Build(),
449 SqliteTokenizer::EndToken::kInclusive);
450 break;
451 }
452 case base::variant_index<Frame::Type, Frame::Root>():
453 break;
454 default:
455 PERFETTO_FATAL("Unknown frame type");
456 }
457 }
458
459 } // namespace
460
PerfettoSqlPreprocessor(SqlSource source,const base::FlatHashMap<std::string,Macro> & macros)461 PerfettoSqlPreprocessor::PerfettoSqlPreprocessor(
462 SqlSource source,
463 const base::FlatHashMap<std::string, Macro>& macros)
464 : global_tokenizer_(std::move(source)), macros_(¯os) {}
465
NextStatement()466 bool PerfettoSqlPreprocessor::NextStatement() {
467 PERFETTO_CHECK(status_.ok());
468
469 // Skip through any number of semi-colons (representing empty statements).
470 SqliteTokenizer::Token tok = global_tokenizer_.NextNonWhitespace();
471 while (tok.token_type == TK_SEMI) {
472 tok = global_tokenizer_.NextNonWhitespace();
473 }
474
475 // If we still see a terminal token at this point, we must have hit EOF.
476 if (tok.IsTerminal()) {
477 PERFETTO_DCHECK(tok.token_type != TK_SEMI);
478 return false;
479 }
480
481 SqlSource stmt =
482 global_tokenizer_.Substr(tok, global_tokenizer_.NextTerminal(),
483 SqliteTokenizer::EndToken::kInclusive);
484
485 State s{{}, *macros_, {}};
486 s.stack.emplace_back(Frame::Root(), Frame::kIgnore, &s, std::move(stmt));
487 for (;;) {
488 auto* frame = &s.stack.back();
489 auto& tk = frame->tokenizer;
490 SqliteTokenizer::Token t = tk.NextNonWhitespace();
491 int token_type;
492 if (t.str.empty()) {
493 token_type = frame->seen_semicolon ? 0 : PPTK_SEMI;
494 frame->seen_semicolon = true;
495 } else if (t.token_type == TK_SEMI) {
496 token_type = PPTK_SEMI;
497 frame->seen_semicolon = true;
498 } else if (t.token_type == TK_ILLEGAL) {
499 if (t.str.size() == 1 && t.str[0] == '!') {
500 token_type = PPTK_EXCLAIM;
501 } else {
502 status_ = ErrorAtToken(tk, t, "illegal token");
503 return false;
504 }
505 } else if (t.token_type == TK_ID) {
506 token_type = PPTK_ID;
507 } else if (t.token_type == TK_LP) {
508 token_type = PPTK_LP;
509 } else if (t.token_type == TK_RP) {
510 token_type = PPTK_RP;
511 } else if (t.token_type == TK_COMMA) {
512 token_type = PPTK_COMMA;
513 } else if (t.token_type == TK_VARIABLE) {
514 token_type = PPTK_VARIABLE;
515 } else {
516 token_type = PPTK_OPAQUE;
517 }
518 frame->preprocessor.Parse(
519 token_type,
520 PreprocessorGrammarToken{t.str.data(), t.str.size(), token_type});
521 if (s.error) {
522 status_ = ErrorAtToken(tk, s.error->token, s.error->message.c_str());
523 return false;
524 }
525 if (token_type == 0) {
526 if (s.stack.size() == 1) {
527 statement_ = std::move(frame->rewriter).Build();
528 return true;
529 }
530 s.stack.pop_back();
531 frame = &s.stack.back();
532 }
533 }
534 }
535
OnPreprocessorCreateApplyList()536 extern "C" PreprocessorGrammarApplyList* OnPreprocessorCreateApplyList() {
537 return std::make_unique<PreprocessorGrammarApplyList>().release();
538 }
539
OnPreprocessorAppendApplyList(PreprocessorGrammarApplyList * list,PreprocessorGrammarTokenBounds * bounds)540 extern "C" PreprocessorGrammarApplyList* OnPreprocessorAppendApplyList(
541 PreprocessorGrammarApplyList* list,
542 PreprocessorGrammarTokenBounds* bounds) {
543 list->args.push_back(*bounds);
544 return list;
545 }
546
OnPreprocessorFreeApplyList(PreprocessorGrammarState *,PreprocessorGrammarApplyList * list)547 extern "C" void OnPreprocessorFreeApplyList(
548 PreprocessorGrammarState*,
549 PreprocessorGrammarApplyList* list) {
550 delete list;
551 }
552
553 } // namespace perfetto::trace_processor
554