xref: /aosp_15_r20/external/perfetto/src/trace_processor/sqlite/sql_source.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_
18 #define SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_
19 
20 #include <cstdint>
21 #include <optional>
22 #include <string>
23 #include <string_view>
24 #include <tuple>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 
29 namespace perfetto {
30 namespace trace_processor {
31 
32 // An SQL string which retains knowledge of the source of the SQL (i.e. stdlib
33 // module, ExecuteQuery etc). It also supports "rewriting" parts or all of the
34 // SQL string with a different string which is useful in cases where SQL is
35 // substituted such as macros or function inlining.
36 class SqlSource {
37  public:
38   class Rewriter;
39 
40   // Creates a SqlSource instance wrapping SQL passed to
41   // |TraceProcessor::ExecuteQuery|.
42   static SqlSource FromExecuteQuery(std::string sql);
43 
44   // Creates a SqlSource instance wrapping SQL executed when running a metric.
45   static SqlSource FromMetric(std::string sql, const std::string& metric_file);
46 
47   // Creates a SqlSource instance wrapping SQL executed when running a metric
48   // file (i.e. with RUN_METRIC).
49   static SqlSource FromMetricFile(std::string sql,
50                                   const std::string& metric_file);
51 
52   // Creates a SqlSource instance wrapping SQL executed when including a module.
53   static SqlSource FromModuleInclude(std::string sql,
54                                      const std::string& module);
55 
56   // Creates a SqlSource instance wrapping SQL which is an internal
57   // implementation detail of trace processor.
58   static SqlSource FromTraceProcessorImplementation(std::string sql);
59 
60   // Returns this SqlSource instance as a string which can be appended as a
61   // "traceback" frame to an error message. Callers should pass an |offset|
62   // parameter which indicates the exact location of the error in the SQL
63   // string. 0 and |sql().size()| are both valid offset positions and correspond
64   // to the start and end of the source respectively.
65   //
66   // Specifically, this string will include:
67   //  a) context about the source of the SQL
68   //  b) line and column number of the error
69   //  c) a snippet of the SQL and a caret (^) character pointing to the location
70   //     of the error.
71   std::string AsTraceback(uint32_t offset) const;
72 
73   // Same as |AsTraceback| but for offsets which come from SQLite instead of
74   // from trace processor tokenization or parsing.
75   std::string AsTracebackForSqliteOffset(std::optional<uint32_t> offset) const;
76 
77   // Creates a SqlSource instance with the SQL taken as a substring starting
78   // at |offset| with |len| characters.
79   SqlSource Substr(uint32_t offset, uint32_t len) const;
80 
81   // Rewrites the SQL backing |this| to SQL from |source| ignoring any existing
82   // rewrites in |this|.
83   //
84   // This is useful when PerfettoSQL statements are transpiled into SQLite
85   // statements but we want to preserve the context of the original statement.
86   SqlSource RewriteAllIgnoreExisting(SqlSource source) const;
87 
88   // Returns the SQL string backing this SqlSource instance;
sql()89   const std::string& sql() const { return root_.rewritten_sql; }
90 
91   // Returns the original SQL string backing this SqlSource instance;
original_sql()92   const std::string& original_sql() const { return root_.original_sql; }
93 
94   // Returns whether this SqlSource has been rewritten.
IsRewritten()95   bool IsRewritten() const { return root_.IsRewritten(); }
96 
97  private:
98   struct Rewrite;
99 
100   // Represents a tree of SQL rewrites, preserving the source for each rewrite.
101   //
102   // Suppose that we have the following situation:
103   // User: `SELECT foo!(a) FROM bar!(slice) a`
104   // foo : `$1.x, $1.y`
105   // bar : `(SELECT baz!($1) FROM $1)`
106   // baz : `$1.x, $1.y, $1.z`
107   //
108   // We want to expand this to
109   // ```SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z FROM slice) a```
110   // while retaining information about the source of the rewrite.
111   //
112   // For example, the string `a.x, a.y` came from foo, `slice.x, slice.y,
113   // slice.z` came from bar, which itself recursively came from baz etc.
114   //
115   // The purpose of this class is to keep track of the information required for
116   // this "tree" of rewrites (i.e. expansions). In the example above, the tree
117   // would look as follows:
118   //                      User
119   //                     /    |
120   //                   foo    bar
121   //                   /
122   //                 baz
123   //
124   // The properties in each of these nodes is as follows:
125   // User {
126   //   original_sql: "SELECT foo!(a) FROM bar!(slice) a"
127   //   rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z
128   //                   FROM slice) a"
129   //   rewrites: [
130   //     {original_sql_start: 7, original_sql_end: 14, node: foo},
131   //     {original_sql_start: 20, original_sql_end: 31, node: bar}]
132   //   ]
133   // }
134   // foo {
135   //   original_sql: "$1.x, $1.y"
136   //   rewritten_sql: "a.x, a.y"
137   //   rewrites: []
138   // }
139   // bar {
140   //   original_sql: "(SELECT baz!($1) FROM $1 LIMIT 1)"
141   //   rewritten_sql: "(SELECT slice.x, slice.y, slice.z FROM slice)"
142   //   rewrites: [{original_sql_start: 8, original_sql_end: 16, node: baz}]
143   // }
144   // baz {
145   //   original_sql = "$1.x, $1.y, $1.z"
146   //   rewritten_sql = "slice.x, slice.y, slice.z"
147   //   rewrites: []
148   // }
149   struct Node {
150     std::string name;
151     bool include_traceback_header = false;
152     uint32_t line = 1;
153     uint32_t col = 1;
154 
155     // The original SQL string used to create this node.
156     std::string original_sql;
157 
158     // The list of rewrites which are applied to |original_sql| ordered by the
159     // offsets.
160     std::vector<Rewrite> rewrites;
161 
162     // The SQL string which is the result of applying |rewrites| to
163     // |original_sql|. See |SqlSource::ApplyRewrites| for details on how this is
164     // computed.
165     std::string rewritten_sql;
166 
167     // Returns the "traceback" for this node and all recursive nodes. See
168     // |SqlSource::AsTraceback| for details.
169     std::string AsTraceback(uint32_t rewritten_offset) const;
170 
171     // Returns the "traceback" for this node only. See |SqlSource::AsTraceback|
172     // for details.
173     std::string SelfTraceback(uint32_t rewritten_offset,
174                               uint32_t original_offset) const;
175 
176     Node Substr(uint32_t rewritten_offset, uint32_t rewritten_len) const;
177 
IsRewrittenNode178     bool IsRewritten() const {
179       PERFETTO_CHECK(rewrites.empty() == (original_sql == rewritten_sql));
180       return !rewrites.empty();
181     }
182 
183     // Given a |rewritten_offset| for this node, returns the offset into the
184     // |original_sql| which matches that |rewritten_offset|.
185     //
186     // IMPORTANT: if |rewritten_offset| is *inside* a rewrite, the original
187     // offset will point to the *start of the rewrite*. For example, if
188     // we have:
189     //   original_sql: "SELECT foo!(a) FROM slice a"
190     //   rewritten_sql: "SELECT a.x, a.y FROM slice a"
191     //   rewrites: [
192     //     {
193     //       original_sql_start: 7,
194     //       original_sql_end: 14,
195     //       rewritten_sql_start: 7,
196     //       rewritten_sql_end: 15,
197     //       node: foo
198     //     }
199     //   ]
200     // then:
201     //   RewrittenOffsetToOriginalOffset(7) == 7     // 7 = start of foo
202     //   RewrittenOffsetToOriginalOffset(14) == 7    // 7 = start of foo
203     //   RewrittenOffsetToOriginalOffset(15) == 14   // 14 = end of foo
204     //   RewrittenOffsetToOriginalOffset(16) == 15
205     uint32_t RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const;
206 
207     // Given an |original_offset| for this node, returns the index of a
208     // rewrite whose original range contains |original_offset|.
209     // Returns std::nullopt if there is no such rewrite.
210     std::optional<uint32_t> RewriteForOriginalOffset(
211         uint32_t original_offset) const;
212   };
213 
214   // Defines a rewrite. See the documentation for |SqlSource::Node| for details
215   // on this.
216   struct Rewrite {
217     // The start and end offsets in |original_sql|.
218     uint32_t original_sql_start;
219     uint32_t original_sql_end;
220 
221     // The start and end offsets in |rewritten_sql|.
222     uint32_t rewritten_sql_start;
223     uint32_t rewritten_sql_end;
224 
225     // Node containing the SQL which replaces the segment of SQL in
226     // |original_sql|.
227     Node rewrite_node;
228   };
229 
230   SqlSource();
231   explicit SqlSource(Node);
232   SqlSource(std::string sql, std::string name, bool include_traceback_header);
233 
234   static std::string ApplyRewrites(const std::string&,
235                                    const std::vector<Rewrite>&);
236 
237   Node root_;
238 };
239 
240 // Used to rewrite a SqlSource using SQL from other SqlSources.
241 class SqlSource::Rewriter {
242  public:
243   // Creates a Rewriter object which can be used to rewrite the SQL backing
244   // |source|.
245   //
246   // Note that rewrites of portions of the SQL which have already been rewritten
247   // is supported but *only in limited cases*. Specifically, the new rewrite
248   // must not cross the boundary of any existing rewrite.
249   //
250   // For example, if we have:
251   //   SqlSource {
252   //     original_sql: "SELECT foo!(a) FROM bar!(slice) a"
253   //     rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x FROM slice) a"
254   //   }
255   // then the following are valid:
256   //   # Replaces "SELECT " with "INSERT ". Valid because it does not touch
257   //   # any rewrite.
258   //   Rewrite(0, 7, "INSERT ")
259   //
260   //   # Replaces "a.x, a." with "a.z, ". Valid because it only touches the
261   //   # contents of the existing "foo" rewrite.
262   //   Rewrite(7, 14, "a.z, ")
263   // while the following are invalid:
264   //   # Fails to replace "SELECT a" with "I". Invalid because it affects both
265   //   # non-rewritten source and the "foo" rewrite.
266   //   Rewrite(0, 8, "I")
267   //
268   //   # Fails to replace "a.x, a.y FROM (" with "(". Invalid because it affects
269   //   # the "foo" rewrite, non-rewritten source and the "bar" rewrite.
270   //   Rewrite(7, 23, "(")
271   explicit Rewriter(SqlSource source);
272 
273   // Replaces the SQL in |source.rewritten_sql| between |rewritten_start| and
274   // |rewritten_end| with the contents of |rewrite|.
275   //
276   // Note that calls to Rewrite must be monontonic and non-overlapping. i.e.
277   // if Rewrite(0, 10) is called, the next |rewritten_end| must be greater than
278   // or equal to 10.
279   //
280   // Note also that all offsets passed to this function correspond to offsets
281   // into |source.rewritten_sql|: past calls to rewrite do not affect future
282   // offsets.
283   void Rewrite(uint32_t rewritten_start,
284                uint32_t rewritten_end,
285                SqlSource rewrite);
286 
287   // Returns the rewritten SqlSource instance.
288   SqlSource Build() &&;
289 
290  private:
291   explicit Rewriter(Node);
292 
293   Node orig_;
294   std::vector<SqlSource::Rewriter> nested_;
295   std::vector<SqlSource::Rewrite> non_nested_;
296 };
297 
298 }  // namespace trace_processor
299 }  // namespace perfetto
300 
301 #endif  // SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_
302