1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ 18 #define SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ 19 20 #include <cstdint> 21 #include <optional> 22 #include <string> 23 #include <string_view> 24 #include <tuple> 25 #include <vector> 26 27 #include "perfetto/base/logging.h" 28 29 namespace perfetto { 30 namespace trace_processor { 31 32 // An SQL string which retains knowledge of the source of the SQL (i.e. stdlib 33 // module, ExecuteQuery etc). It also supports "rewriting" parts or all of the 34 // SQL string with a different string which is useful in cases where SQL is 35 // substituted such as macros or function inlining. 36 class SqlSource { 37 public: 38 class Rewriter; 39 40 // Creates a SqlSource instance wrapping SQL passed to 41 // |TraceProcessor::ExecuteQuery|. 42 static SqlSource FromExecuteQuery(std::string sql); 43 44 // Creates a SqlSource instance wrapping SQL executed when running a metric. 45 static SqlSource FromMetric(std::string sql, const std::string& metric_file); 46 47 // Creates a SqlSource instance wrapping SQL executed when running a metric 48 // file (i.e. with RUN_METRIC). 49 static SqlSource FromMetricFile(std::string sql, 50 const std::string& metric_file); 51 52 // Creates a SqlSource instance wrapping SQL executed when including a module. 53 static SqlSource FromModuleInclude(std::string sql, 54 const std::string& module); 55 56 // Creates a SqlSource instance wrapping SQL which is an internal 57 // implementation detail of trace processor. 58 static SqlSource FromTraceProcessorImplementation(std::string sql); 59 60 // Returns this SqlSource instance as a string which can be appended as a 61 // "traceback" frame to an error message. Callers should pass an |offset| 62 // parameter which indicates the exact location of the error in the SQL 63 // string. 0 and |sql().size()| are both valid offset positions and correspond 64 // to the start and end of the source respectively. 65 // 66 // Specifically, this string will include: 67 // a) context about the source of the SQL 68 // b) line and column number of the error 69 // c) a snippet of the SQL and a caret (^) character pointing to the location 70 // of the error. 71 std::string AsTraceback(uint32_t offset) const; 72 73 // Same as |AsTraceback| but for offsets which come from SQLite instead of 74 // from trace processor tokenization or parsing. 75 std::string AsTracebackForSqliteOffset(std::optional<uint32_t> offset) const; 76 77 // Creates a SqlSource instance with the SQL taken as a substring starting 78 // at |offset| with |len| characters. 79 SqlSource Substr(uint32_t offset, uint32_t len) const; 80 81 // Rewrites the SQL backing |this| to SQL from |source| ignoring any existing 82 // rewrites in |this|. 83 // 84 // This is useful when PerfettoSQL statements are transpiled into SQLite 85 // statements but we want to preserve the context of the original statement. 86 SqlSource RewriteAllIgnoreExisting(SqlSource source) const; 87 88 // Returns the SQL string backing this SqlSource instance; sql()89 const std::string& sql() const { return root_.rewritten_sql; } 90 91 // Returns the original SQL string backing this SqlSource instance; original_sql()92 const std::string& original_sql() const { return root_.original_sql; } 93 94 // Returns whether this SqlSource has been rewritten. IsRewritten()95 bool IsRewritten() const { return root_.IsRewritten(); } 96 97 private: 98 struct Rewrite; 99 100 // Represents a tree of SQL rewrites, preserving the source for each rewrite. 101 // 102 // Suppose that we have the following situation: 103 // User: `SELECT foo!(a) FROM bar!(slice) a` 104 // foo : `$1.x, $1.y` 105 // bar : `(SELECT baz!($1) FROM $1)` 106 // baz : `$1.x, $1.y, $1.z` 107 // 108 // We want to expand this to 109 // ```SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z FROM slice) a``` 110 // while retaining information about the source of the rewrite. 111 // 112 // For example, the string `a.x, a.y` came from foo, `slice.x, slice.y, 113 // slice.z` came from bar, which itself recursively came from baz etc. 114 // 115 // The purpose of this class is to keep track of the information required for 116 // this "tree" of rewrites (i.e. expansions). In the example above, the tree 117 // would look as follows: 118 // User 119 // / | 120 // foo bar 121 // / 122 // baz 123 // 124 // The properties in each of these nodes is as follows: 125 // User { 126 // original_sql: "SELECT foo!(a) FROM bar!(slice) a" 127 // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x, slice.y, slice.z 128 // FROM slice) a" 129 // rewrites: [ 130 // {original_sql_start: 7, original_sql_end: 14, node: foo}, 131 // {original_sql_start: 20, original_sql_end: 31, node: bar}] 132 // ] 133 // } 134 // foo { 135 // original_sql: "$1.x, $1.y" 136 // rewritten_sql: "a.x, a.y" 137 // rewrites: [] 138 // } 139 // bar { 140 // original_sql: "(SELECT baz!($1) FROM $1 LIMIT 1)" 141 // rewritten_sql: "(SELECT slice.x, slice.y, slice.z FROM slice)" 142 // rewrites: [{original_sql_start: 8, original_sql_end: 16, node: baz}] 143 // } 144 // baz { 145 // original_sql = "$1.x, $1.y, $1.z" 146 // rewritten_sql = "slice.x, slice.y, slice.z" 147 // rewrites: [] 148 // } 149 struct Node { 150 std::string name; 151 bool include_traceback_header = false; 152 uint32_t line = 1; 153 uint32_t col = 1; 154 155 // The original SQL string used to create this node. 156 std::string original_sql; 157 158 // The list of rewrites which are applied to |original_sql| ordered by the 159 // offsets. 160 std::vector<Rewrite> rewrites; 161 162 // The SQL string which is the result of applying |rewrites| to 163 // |original_sql|. See |SqlSource::ApplyRewrites| for details on how this is 164 // computed. 165 std::string rewritten_sql; 166 167 // Returns the "traceback" for this node and all recursive nodes. See 168 // |SqlSource::AsTraceback| for details. 169 std::string AsTraceback(uint32_t rewritten_offset) const; 170 171 // Returns the "traceback" for this node only. See |SqlSource::AsTraceback| 172 // for details. 173 std::string SelfTraceback(uint32_t rewritten_offset, 174 uint32_t original_offset) const; 175 176 Node Substr(uint32_t rewritten_offset, uint32_t rewritten_len) const; 177 IsRewrittenNode178 bool IsRewritten() const { 179 PERFETTO_CHECK(rewrites.empty() == (original_sql == rewritten_sql)); 180 return !rewrites.empty(); 181 } 182 183 // Given a |rewritten_offset| for this node, returns the offset into the 184 // |original_sql| which matches that |rewritten_offset|. 185 // 186 // IMPORTANT: if |rewritten_offset| is *inside* a rewrite, the original 187 // offset will point to the *start of the rewrite*. For example, if 188 // we have: 189 // original_sql: "SELECT foo!(a) FROM slice a" 190 // rewritten_sql: "SELECT a.x, a.y FROM slice a" 191 // rewrites: [ 192 // { 193 // original_sql_start: 7, 194 // original_sql_end: 14, 195 // rewritten_sql_start: 7, 196 // rewritten_sql_end: 15, 197 // node: foo 198 // } 199 // ] 200 // then: 201 // RewrittenOffsetToOriginalOffset(7) == 7 // 7 = start of foo 202 // RewrittenOffsetToOriginalOffset(14) == 7 // 7 = start of foo 203 // RewrittenOffsetToOriginalOffset(15) == 14 // 14 = end of foo 204 // RewrittenOffsetToOriginalOffset(16) == 15 205 uint32_t RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const; 206 207 // Given an |original_offset| for this node, returns the index of a 208 // rewrite whose original range contains |original_offset|. 209 // Returns std::nullopt if there is no such rewrite. 210 std::optional<uint32_t> RewriteForOriginalOffset( 211 uint32_t original_offset) const; 212 }; 213 214 // Defines a rewrite. See the documentation for |SqlSource::Node| for details 215 // on this. 216 struct Rewrite { 217 // The start and end offsets in |original_sql|. 218 uint32_t original_sql_start; 219 uint32_t original_sql_end; 220 221 // The start and end offsets in |rewritten_sql|. 222 uint32_t rewritten_sql_start; 223 uint32_t rewritten_sql_end; 224 225 // Node containing the SQL which replaces the segment of SQL in 226 // |original_sql|. 227 Node rewrite_node; 228 }; 229 230 SqlSource(); 231 explicit SqlSource(Node); 232 SqlSource(std::string sql, std::string name, bool include_traceback_header); 233 234 static std::string ApplyRewrites(const std::string&, 235 const std::vector<Rewrite>&); 236 237 Node root_; 238 }; 239 240 // Used to rewrite a SqlSource using SQL from other SqlSources. 241 class SqlSource::Rewriter { 242 public: 243 // Creates a Rewriter object which can be used to rewrite the SQL backing 244 // |source|. 245 // 246 // Note that rewrites of portions of the SQL which have already been rewritten 247 // is supported but *only in limited cases*. Specifically, the new rewrite 248 // must not cross the boundary of any existing rewrite. 249 // 250 // For example, if we have: 251 // SqlSource { 252 // original_sql: "SELECT foo!(a) FROM bar!(slice) a" 253 // rewritten_sql: "SELECT a.x, a.y FROM (SELECT slice.x FROM slice) a" 254 // } 255 // then the following are valid: 256 // # Replaces "SELECT " with "INSERT ". Valid because it does not touch 257 // # any rewrite. 258 // Rewrite(0, 7, "INSERT ") 259 // 260 // # Replaces "a.x, a." with "a.z, ". Valid because it only touches the 261 // # contents of the existing "foo" rewrite. 262 // Rewrite(7, 14, "a.z, ") 263 // while the following are invalid: 264 // # Fails to replace "SELECT a" with "I". Invalid because it affects both 265 // # non-rewritten source and the "foo" rewrite. 266 // Rewrite(0, 8, "I") 267 // 268 // # Fails to replace "a.x, a.y FROM (" with "(". Invalid because it affects 269 // # the "foo" rewrite, non-rewritten source and the "bar" rewrite. 270 // Rewrite(7, 23, "(") 271 explicit Rewriter(SqlSource source); 272 273 // Replaces the SQL in |source.rewritten_sql| between |rewritten_start| and 274 // |rewritten_end| with the contents of |rewrite|. 275 // 276 // Note that calls to Rewrite must be monontonic and non-overlapping. i.e. 277 // if Rewrite(0, 10) is called, the next |rewritten_end| must be greater than 278 // or equal to 10. 279 // 280 // Note also that all offsets passed to this function correspond to offsets 281 // into |source.rewritten_sql|: past calls to rewrite do not affect future 282 // offsets. 283 void Rewrite(uint32_t rewritten_start, 284 uint32_t rewritten_end, 285 SqlSource rewrite); 286 287 // Returns the rewritten SqlSource instance. 288 SqlSource Build() &&; 289 290 private: 291 explicit Rewriter(Node); 292 293 Node orig_; 294 std::vector<SqlSource::Rewriter> nested_; 295 std::vector<SqlSource::Rewrite> non_nested_; 296 }; 297 298 } // namespace trace_processor 299 } // namespace perfetto 300 301 #endif // SRC_TRACE_PROCESSOR_SQLITE_SQL_SOURCE_H_ 302