1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/perfetto_sql/preprocessor/perfetto_sql_preprocessor.h"
18 
19 #include <algorithm>
20 #include <cstddef>
21 #include <cstdint>
22 #include <cstdlib>
23 #include <list>
24 #include <memory>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <unordered_set>
29 #include <utility>
30 #include <variant>
31 #include <vector>
32 
33 #include "perfetto/base/compiler.h"
34 #include "perfetto/base/logging.h"
35 #include "perfetto/base/status.h"
36 #include "perfetto/ext/base/flat_hash_map.h"
37 #include "perfetto/ext/base/string_utils.h"
38 #include "src/trace_processor/perfetto_sql/preprocessor/preprocessor_grammar_interface.h"
39 #include "src/trace_processor/perfetto_sql/tokenizer/sqlite_tokenizer.h"
40 #include "src/trace_processor/sqlite/sql_source.h"
41 
42 namespace perfetto::trace_processor {
43 namespace {
44 
45 using State = PreprocessorGrammarState;
46 
47 struct Preprocessor {
48  public:
Preprocessorperfetto::trace_processor::__anon99d96b220111::Preprocessor49   explicit Preprocessor(State* state)
50       : parser_(PreprocessorGrammarParseAlloc(malloc, state)) {}
~Preprocessorperfetto::trace_processor::__anon99d96b220111::Preprocessor51   ~Preprocessor() { PreprocessorGrammarParseFree(parser_, free); }
52 
Parseperfetto::trace_processor::__anon99d96b220111::Preprocessor53   void Parse(int token_type, PreprocessorGrammarToken token) {
54     PreprocessorGrammarParse(parser_, token_type, token);
55   }
56 
57  private:
58   void* parser_;
59 };
60 
61 struct Stringify {
62   bool ignore_table;
63 };
64 struct Apply {
65   int join_token;
66   int prefix_token;
67 };
68 using MacroImpl =
69     std::variant<PerfettoSqlPreprocessor::Macro*, Stringify, Apply>;
70 
71 // Synthetic "stackframe" representing the processing of a single piece of SQL.
72 struct Frame {
73   struct Root {};
74   struct Rewrite {
75     SqliteTokenizer& tokenizer;
76     SqlSource::Rewriter& rewriter;
77     SqliteTokenizer::Token start;
78     SqliteTokenizer::Token end;
79   };
80   struct Append {
81     std::vector<SqlSource>& result;
82   };
83   using Type = std::variant<Root, Rewrite, Append>;
84   struct ActiveMacro {
85     std::string name;
86     MacroImpl impl;
87     std::vector<SqlSource> args;
88     uint32_t nested_macro_count;
89     std::unordered_set<std::string> seen_variables;
90     std::unordered_set<std::string> expanded_variables;
91   };
92   enum VariableHandling { kLookup, kLookupOrIgnore, kIgnore };
93 
Frameperfetto::trace_processor::__anon99d96b220111::Frame94   explicit Frame(Type _type,
95                  VariableHandling _var_handling,
96                  State* s,
97                  const SqlSource& source)
98       : type(_type),
99         var_handling(_var_handling),
100         preprocessor(s),
101         tokenizer(source),
102         rewriter(source),
103         substituitions(&owned_substituitions) {}
104   Frame(const Frame&) = delete;
105   Frame& operator=(const Frame&) = delete;
106   Frame(Frame&&) = delete;
107   Frame& operator=(Frame&&) = delete;
108 
109   Type type;
110   VariableHandling var_handling;
111   Preprocessor preprocessor;
112   SqliteTokenizer tokenizer;
113 
114   bool seen_semicolon = false;
115   SqlSource::Rewriter rewriter;
116   bool ignore_rewrite = false;
117 
118   std::optional<ActiveMacro> active_macro;
119 
120   base::FlatHashMap<std::string, SqlSource> owned_substituitions;
121   base::FlatHashMap<std::string, SqlSource>* substituitions;
122 };
123 
124 struct ErrorToken {
125   SqliteTokenizer::Token token;
126   std::string message;
127 };
128 
129 extern "C" struct PreprocessorGrammarState {
130   std::list<Frame> stack;
131   const base::FlatHashMap<std::string, PerfettoSqlPreprocessor::Macro>& macros;
132   std::optional<ErrorToken> error;
133 };
134 
135 extern "C" struct PreprocessorGrammarApplyList {
136   std::vector<PreprocessorGrammarTokenBounds> args;
137 };
138 
GrammarTokenToTokenizerToken(const PreprocessorGrammarToken & token)139 SqliteTokenizer::Token GrammarTokenToTokenizerToken(
140     const PreprocessorGrammarToken& token) {
141   return SqliteTokenizer::Token{std::string_view(token.ptr, token.n),
142                                 TK_ILLEGAL};
143 }
144 
ErrorAtToken(const SqliteTokenizer & tokenizer,const SqliteTokenizer::Token & token,const char * error)145 base::Status ErrorAtToken(const SqliteTokenizer& tokenizer,
146                           const SqliteTokenizer::Token& token,
147                           const char* error) {
148   std::string traceback = tokenizer.AsTraceback(token);
149   return base::ErrStatus("%s%s", traceback.c_str(), error);
150 }
151 
SqlSourceVectorToString(const std::vector<SqlSource> & vec)152 std::vector<std::string> SqlSourceVectorToString(
153     const std::vector<SqlSource>& vec) {
154   std::vector<std::string> pieces;
155   pieces.reserve(vec.size());
156   for (const auto& list : vec) {
157     pieces.emplace_back(list.sql());
158   }
159   return pieces;
160 }
161 
BoundsToStringView(const PreprocessorGrammarTokenBounds & b)162 std::string_view BoundsToStringView(const PreprocessorGrammarTokenBounds& b) {
163   return {b.start.ptr, static_cast<size_t>(b.end.ptr + b.end.n - b.start.ptr)};
164 }
165 
RewriteIntrinsicMacro(Frame & frame,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)166 void RewriteIntrinsicMacro(Frame& frame,
167                            SqliteTokenizer::Token name,
168                            SqliteTokenizer::Token rp) {
169   const auto& macro = *frame.active_macro;
170   frame.tokenizer.Rewrite(
171       frame.rewriter, name, rp,
172       SqlSource::FromTraceProcessorImplementation(
173           macro.name + "!(" +
174           base::Join(SqlSourceVectorToString(macro.args), ", ") + ")"),
175       SqliteTokenizer::EndToken::kInclusive);
176 }
177 
ExecuteSqlMacro(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)178 void ExecuteSqlMacro(State* state,
179                      Frame& frame,
180                      Frame::ActiveMacro& macro,
181                      SqliteTokenizer::Token name,
182                      SqliteTokenizer::Token rp) {
183   auto& sql_macro = std::get<PerfettoSqlPreprocessor::Macro*>(macro.impl);
184   if (macro.args.size() != sql_macro->args.size()) {
185     state->error = ErrorToken{
186         name,
187         base::ErrStatus(
188             "wrong number of macro arguments, expected %zu actual %zu",
189             sql_macro->args.size(), macro.args.size())
190             .message(),
191     };
192     return;
193   }
194   // TODO(lalitm): switch back to kLookup once we have proper parser support.
195   state->stack.emplace_back(
196       Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
197       Frame::kLookupOrIgnore, state, sql_macro->sql);
198   auto& macro_frame = state->stack.back();
199   for (uint32_t i = 0; i < sql_macro->args.size(); ++i) {
200     macro_frame.owned_substituitions.Insert(sql_macro->args[i],
201                                             std::move(macro.args[i]));
202   }
203 }
204 
ExecuteStringify(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)205 void ExecuteStringify(State* state,
206                       Frame& frame,
207                       Frame::ActiveMacro& macro,
208                       SqliteTokenizer::Token name,
209                       SqliteTokenizer::Token rp) {
210   auto& stringify = std::get<Stringify>(macro.impl);
211   if (macro.args.size() != 1) {
212     state->error = ErrorToken{
213         name,
214         base::ErrStatus(
215             "stringify: must specify exactly 1 argument, actual %zu",
216             macro.args.size())
217             .message(),
218     };
219     return;
220   }
221   bool can_stringify_outer =
222       macro.seen_variables.empty() ||
223       (stringify.ignore_table && macro.seen_variables.size() == 1 &&
224        macro.seen_variables.count("table"));
225   if (!can_stringify_outer) {
226     RewriteIntrinsicMacro(frame, name, rp);
227     return;
228   }
229   if (!macro.expanded_variables.empty()) {
230     state->stack.emplace_back(
231         Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
232         Frame::kIgnore, state,
233         SqlSource::FromTraceProcessorImplementation(macro.name + "!(" +
234                                                     macro.args[0].sql() + ")"));
235     auto& expand_frame = state->stack.back();
236     expand_frame.substituitions = frame.substituitions;
237     return;
238   }
239   auto res = SqlSource::FromTraceProcessorImplementation(
240       "'" + macro.args[0].sql() + "'");
241   frame.tokenizer.Rewrite(frame.rewriter, name, rp, std::move(res),
242                           SqliteTokenizer::EndToken::kInclusive);
243 }
244 
ExecuteApply(State * state,Frame & frame,Frame::ActiveMacro & macro,SqliteTokenizer::Token name,SqliteTokenizer::Token rp)245 void ExecuteApply(State* state,
246                   Frame& frame,
247                   Frame::ActiveMacro& macro,
248                   SqliteTokenizer::Token name,
249                   SqliteTokenizer::Token rp) {
250   auto& apply = std::get<Apply>(macro.impl);
251   if (!macro.seen_variables.empty()) {
252     RewriteIntrinsicMacro(frame, name, rp);
253     return;
254   }
255   state->stack.emplace_back(
256       Frame::Rewrite{frame.tokenizer, frame.rewriter, name, rp},
257       Frame::VariableHandling::kIgnore, state,
258       SqlSource::FromTraceProcessorImplementation(
259           base::Join(SqlSourceVectorToString(macro.args), " ")));
260 
261   auto& expansion_frame = state->stack.back();
262   expansion_frame.preprocessor.Parse(
263       PPTK_APPLY, PreprocessorGrammarToken{nullptr, 0, PPTK_APPLY});
264   expansion_frame.preprocessor.Parse(
265       apply.join_token, PreprocessorGrammarToken{nullptr, 0, apply.join_token});
266   expansion_frame.preprocessor.Parse(
267       apply.prefix_token,
268       PreprocessorGrammarToken{nullptr, 0, apply.prefix_token});
269   expansion_frame.ignore_rewrite = true;
270 }
271 
OnPreprocessorSyntaxError(State * state,PreprocessorGrammarToken * token)272 extern "C" void OnPreprocessorSyntaxError(State* state,
273                                           PreprocessorGrammarToken* token) {
274   state->error = {GrammarTokenToTokenizerToken(*token),
275                   "preprocessor syntax error"};
276 }
277 
OnPreprocessorApply(PreprocessorGrammarState * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * join,PreprocessorGrammarToken * prefix,PreprocessorGrammarApplyList * raw_a,PreprocessorGrammarApplyList * raw_b)278 extern "C" void OnPreprocessorApply(PreprocessorGrammarState* state,
279                                     PreprocessorGrammarToken* name,
280                                     PreprocessorGrammarToken* join,
281                                     PreprocessorGrammarToken* prefix,
282                                     PreprocessorGrammarApplyList* raw_a,
283                                     PreprocessorGrammarApplyList* raw_b) {
284   std::unique_ptr<PreprocessorGrammarApplyList> a(raw_a);
285   std::unique_ptr<PreprocessorGrammarApplyList> b(raw_b);
286   auto& frame = state->stack.back();
287   size_t size = std::min(a->args.size(), b ? b->args.size() : a->args.size());
288   if (size == 0) {
289     auto& rewrite = std::get<Frame::Rewrite>(frame.type);
290     rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
291                               SqlSource::FromTraceProcessorImplementation(""),
292                               SqliteTokenizer::EndToken::kInclusive);
293     return;
294   }
295   std::string macro(name->ptr, name->n);
296   std::vector<std::string> args;
297   for (uint32_t i = 0; i < size; ++i) {
298     std::string arg = macro;
299     arg.append("!(").append(BoundsToStringView(a->args[i]));
300     if (b) {
301       arg.append(",").append(BoundsToStringView(b->args[i]));
302     }
303     arg.append(")");
304     args.emplace_back(std::move(arg));
305   }
306   std::string joiner = join->major == PPTK_AND ? " AND " : " , ";
307   std::string res = prefix->major == PPTK_TRUE ? joiner : "";
308   res.append(base::Join(args, joiner));
309   state->stack.emplace_back(
310       frame.type, Frame::VariableHandling::kLookupOrIgnore, state,
311       SqlSource::FromTraceProcessorImplementation(std::move(res)));
312 }
313 
OnPreprocessorVariable(State * state,PreprocessorGrammarToken * var)314 extern "C" void OnPreprocessorVariable(State* state,
315                                        PreprocessorGrammarToken* var) {
316   if (var->n == 0 || var->ptr[0] != '$') {
317     state->error = {GrammarTokenToTokenizerToken(*var),
318                     "variable must start with '$'"};
319     return;
320   }
321   auto& frame = state->stack.back();
322   if (frame.active_macro) {
323     std::string name(var->ptr + 1, var->n - 1);
324     if (frame.substituitions->Find(name)) {
325       frame.active_macro->expanded_variables.insert(name);
326     } else {
327       frame.active_macro->seen_variables.insert(name);
328     }
329     return;
330   }
331   switch (frame.var_handling) {
332     case Frame::kLookup:
333     case Frame::kLookupOrIgnore: {
334       auto* it =
335           frame.substituitions->Find(std::string(var->ptr + 1, var->n - 1));
336       if (!it) {
337         if (frame.var_handling == Frame::kLookup) {
338           state->error = {GrammarTokenToTokenizerToken(*var),
339                           "variable not defined"};
340         }
341         return;
342       }
343       frame.tokenizer.RewriteToken(frame.rewriter,
344                                    GrammarTokenToTokenizerToken(*var), *it);
345       break;
346     }
347     case Frame::kIgnore:
348       break;
349   }
350 }
351 
OnPreprocessorMacroId(State * state,PreprocessorGrammarToken * name_tok)352 extern "C" void OnPreprocessorMacroId(State* state,
353                                       PreprocessorGrammarToken* name_tok) {
354   auto& invocation = state->stack.back();
355   if (invocation.active_macro) {
356     invocation.active_macro->nested_macro_count++;
357     return;
358   }
359   std::string name(name_tok->ptr, name_tok->n);
360   MacroImpl impl;
361   if (name == "__intrinsic_stringify") {
362     impl = Stringify();
363   } else if (name == "__intrinsic_stringify_ignore_table") {
364     impl = Stringify{true};
365   } else if (name == "__intrinsic_token_apply") {
366     impl = Apply{PPTK_COMMA, PPTK_FALSE};
367   } else if (name == "__intrinsic_token_apply_prefix") {
368     impl = Apply{PPTK_COMMA, PPTK_TRUE};
369   } else if (name == "__intrinsic_token_apply_and") {
370     impl = Apply{PPTK_AND, PPTK_FALSE};
371   } else if (name == "__intrinsic_token_apply_and_prefix") {
372     impl = Apply{PPTK_AND, PPTK_TRUE};
373   } else {
374     auto* sql_macro = state->macros.Find(name);
375     if (!sql_macro) {
376       state->error = {GrammarTokenToTokenizerToken(*name_tok),
377                       "no such macro defined"};
378       return;
379     }
380     impl = sql_macro;
381   }
382   invocation.active_macro =
383       Frame::ActiveMacro{std::move(name), impl, {}, 0, {}, {}};
384 }
385 
OnPreprocessorMacroArg(State * state,PreprocessorGrammarTokenBounds * arg)386 extern "C" void OnPreprocessorMacroArg(State* state,
387                                        PreprocessorGrammarTokenBounds* arg) {
388   auto& frame = state->stack.back();
389   auto& macro = *frame.active_macro;
390   if (macro.nested_macro_count > 0) {
391     return;
392   }
393   auto start_token = GrammarTokenToTokenizerToken(arg->start);
394   auto end_token = GrammarTokenToTokenizerToken(arg->end);
395   state->stack.emplace_back(
396       Frame::Append{macro.args}, frame.var_handling, state,
397       frame.tokenizer.Substr(start_token, end_token,
398                              SqliteTokenizer::EndToken::kInclusive));
399 
400   auto& arg_frame = state->stack.back();
401   arg_frame.substituitions = frame.substituitions;
402 }
403 
OnPreprocessorMacroEnd(State * state,PreprocessorGrammarToken * name,PreprocessorGrammarToken * rp)404 extern "C" void OnPreprocessorMacroEnd(State* state,
405                                        PreprocessorGrammarToken* name,
406                                        PreprocessorGrammarToken* rp) {
407   auto& frame = state->stack.back();
408   auto& macro = *frame.active_macro;
409   if (macro.nested_macro_count > 0) {
410     --macro.nested_macro_count;
411     return;
412   }
413   switch (macro.impl.index()) {
414     case base::variant_index<MacroImpl, PerfettoSqlPreprocessor::Macro*>():
415       ExecuteSqlMacro(state, frame, macro, GrammarTokenToTokenizerToken(*name),
416                       GrammarTokenToTokenizerToken(*rp));
417       break;
418     case base::variant_index<MacroImpl, Stringify>():
419       ExecuteStringify(state, frame, macro, GrammarTokenToTokenizerToken(*name),
420                        GrammarTokenToTokenizerToken(*rp));
421       break;
422     case base::variant_index<MacroImpl, Apply>():
423       ExecuteApply(state, frame, macro, GrammarTokenToTokenizerToken(*name),
424                    GrammarTokenToTokenizerToken(*rp));
425       break;
426     default:
427       PERFETTO_FATAL("Unknown variant type");
428   }
429   frame.active_macro = std::nullopt;
430 }
431 
OnPreprocessorEnd(State * state)432 extern "C" void OnPreprocessorEnd(State* state) {
433   auto& frame = state->stack.back();
434   PERFETTO_CHECK(!frame.active_macro);
435 
436   if (frame.ignore_rewrite) {
437     return;
438   }
439   switch (frame.type.index()) {
440     case base::variant_index<Frame::Type, Frame::Append>(): {
441       auto& append = std::get<Frame::Append>(frame.type);
442       append.result.push_back(std::move(frame.rewriter).Build());
443       break;
444     }
445     case base::variant_index<Frame::Type, Frame::Rewrite>(): {
446       auto& rewrite = std::get<Frame::Rewrite>(frame.type);
447       rewrite.tokenizer.Rewrite(rewrite.rewriter, rewrite.start, rewrite.end,
448                                 std::move(frame.rewriter).Build(),
449                                 SqliteTokenizer::EndToken::kInclusive);
450       break;
451     }
452     case base::variant_index<Frame::Type, Frame::Root>():
453       break;
454     default:
455       PERFETTO_FATAL("Unknown frame type");
456   }
457 }
458 
459 }  // namespace
460 
PerfettoSqlPreprocessor(SqlSource source,const base::FlatHashMap<std::string,Macro> & macros)461 PerfettoSqlPreprocessor::PerfettoSqlPreprocessor(
462     SqlSource source,
463     const base::FlatHashMap<std::string, Macro>& macros)
464     : global_tokenizer_(std::move(source)), macros_(&macros) {}
465 
NextStatement()466 bool PerfettoSqlPreprocessor::NextStatement() {
467   PERFETTO_CHECK(status_.ok());
468 
469   // Skip through any number of semi-colons (representing empty statements).
470   SqliteTokenizer::Token tok = global_tokenizer_.NextNonWhitespace();
471   while (tok.token_type == TK_SEMI) {
472     tok = global_tokenizer_.NextNonWhitespace();
473   }
474 
475   // If we still see a terminal token at this point, we must have hit EOF.
476   if (tok.IsTerminal()) {
477     PERFETTO_DCHECK(tok.token_type != TK_SEMI);
478     return false;
479   }
480 
481   SqlSource stmt =
482       global_tokenizer_.Substr(tok, global_tokenizer_.NextTerminal(),
483                                SqliteTokenizer::EndToken::kInclusive);
484 
485   State s{{}, *macros_, {}};
486   s.stack.emplace_back(Frame::Root(), Frame::kIgnore, &s, std::move(stmt));
487   for (;;) {
488     auto* frame = &s.stack.back();
489     auto& tk = frame->tokenizer;
490     SqliteTokenizer::Token t = tk.NextNonWhitespace();
491     int token_type;
492     if (t.str.empty()) {
493       token_type = frame->seen_semicolon ? 0 : PPTK_SEMI;
494       frame->seen_semicolon = true;
495     } else if (t.token_type == TK_SEMI) {
496       token_type = PPTK_SEMI;
497       frame->seen_semicolon = true;
498     } else if (t.token_type == TK_ILLEGAL) {
499       if (t.str.size() == 1 && t.str[0] == '!') {
500         token_type = PPTK_EXCLAIM;
501       } else {
502         status_ = ErrorAtToken(tk, t, "illegal token");
503         return false;
504       }
505     } else if (t.token_type == TK_ID) {
506       token_type = PPTK_ID;
507     } else if (t.token_type == TK_LP) {
508       token_type = PPTK_LP;
509     } else if (t.token_type == TK_RP) {
510       token_type = PPTK_RP;
511     } else if (t.token_type == TK_COMMA) {
512       token_type = PPTK_COMMA;
513     } else if (t.token_type == TK_VARIABLE) {
514       token_type = PPTK_VARIABLE;
515     } else {
516       token_type = PPTK_OPAQUE;
517     }
518     frame->preprocessor.Parse(
519         token_type,
520         PreprocessorGrammarToken{t.str.data(), t.str.size(), token_type});
521     if (s.error) {
522       status_ = ErrorAtToken(tk, s.error->token, s.error->message.c_str());
523       return false;
524     }
525     if (token_type == 0) {
526       if (s.stack.size() == 1) {
527         statement_ = std::move(frame->rewriter).Build();
528         return true;
529       }
530       s.stack.pop_back();
531       frame = &s.stack.back();
532     }
533   }
534 }
535 
OnPreprocessorCreateApplyList()536 extern "C" PreprocessorGrammarApplyList* OnPreprocessorCreateApplyList() {
537   return std::make_unique<PreprocessorGrammarApplyList>().release();
538 }
539 
OnPreprocessorAppendApplyList(PreprocessorGrammarApplyList * list,PreprocessorGrammarTokenBounds * bounds)540 extern "C" PreprocessorGrammarApplyList* OnPreprocessorAppendApplyList(
541     PreprocessorGrammarApplyList* list,
542     PreprocessorGrammarTokenBounds* bounds) {
543   list->args.push_back(*bounds);
544   return list;
545 }
546 
OnPreprocessorFreeApplyList(PreprocessorGrammarState *,PreprocessorGrammarApplyList * list)547 extern "C" void OnPreprocessorFreeApplyList(
548     PreprocessorGrammarState*,
549     PreprocessorGrammarApplyList* list) {
550   delete list;
551 }
552 
553 }  // namespace perfetto::trace_processor
554