xref: /aosp_15_r20/external/perfetto/src/trace_processor/sqlite/sql_source.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/sqlite/sql_source.h"
18 
19 #include <sqlite3.h>
20 #include <algorithm>
21 #include <cstddef>
22 #include <cstdint>
23 #include <iterator>
24 #include <limits>
25 #include <optional>
26 #include <string>
27 #include <string_view>
28 #include <utility>
29 #include <vector>
30 
31 #include "perfetto/base/logging.h"
32 #include "perfetto/ext/base/string_utils.h"
33 
34 #if SQLITE_VERSION_NUMBER < 3041002
35 // There is a bug in pre-3.41.2 versions of SQLite where sqlite3_error_offset
36 // can return an offset out of bounds. Make it a hard compiler error to prevent
37 // us from hitting this bug.
38 #error "SQLite version is too old."
39 #endif
40 
41 namespace perfetto::trace_processor {
42 
43 namespace {
44 
GetLineAndColumnForOffset(const std::string & sql,uint32_t line,uint32_t column,uint32_t offset)45 std::pair<uint32_t, uint32_t> GetLineAndColumnForOffset(const std::string& sql,
46                                                         uint32_t line,
47                                                         uint32_t column,
48                                                         uint32_t offset) {
49   if (offset == 0) {
50     return std::make_pair(line, column);
51   }
52 
53   const char* new_start = sql.c_str() + offset;
54   size_t prev_nl = sql.rfind('\n', offset - 1);
55   int64_t nl_count = std::count(sql.c_str(), new_start, '\n');
56   PERFETTO_DCHECK((nl_count == 0) == (prev_nl == std::string_view::npos));
57 
58   if (prev_nl == std::string::npos) {
59     return std::make_pair(line + static_cast<uint32_t>(nl_count),
60                           column + static_cast<uint32_t>(offset));
61   }
62 
63   int64_t new_column = std::distance(sql.c_str() + prev_nl, new_start);
64   return std::make_pair(line + static_cast<uint32_t>(nl_count),
65                         static_cast<uint32_t>(new_column));
66 }
67 
SqlContextAndCaretPos(const std::string & sql,uint32_t offset)68 std::pair<std::string, size_t> SqlContextAndCaretPos(const std::string& sql,
69                                                      uint32_t offset) {
70   PERFETTO_DCHECK(offset <= sql.size());
71 
72   // Go back 128 characters, until the start of the string or the start of the
73   // line (which we encounter first).
74   size_t start_idx = offset - std::min<size_t>(128ul, offset);
75   if (offset > 0) {
76     size_t prev_nl = sql.rfind('\n', offset - 1);
77     if (prev_nl != std::string::npos) {
78       start_idx = std::max(prev_nl + 1, start_idx);
79     }
80   }
81 
82   // Go forward 128 characters, to the end of the string or the end of the
83   // line (which we encounter first).
84   size_t end_idx = std::min<size_t>(offset + 128ul, sql.size());
85   size_t next_nl = sql.find('\n', offset);
86   if (next_nl != std::string::npos) {
87     end_idx = std::min(next_nl, end_idx);
88   }
89   return std::make_pair(sql.substr(start_idx, end_idx - start_idx),
90                         offset - start_idx);
91 }
92 
93 }  // namespace
94 
95 SqlSource::SqlSource() = default;
SqlSource(Node node)96 SqlSource::SqlSource(Node node) : root_(std::move(node)) {}
97 
SqlSource(std::string sql,std::string name,bool include_traceback_header)98 SqlSource::SqlSource(std::string sql,
99                      std::string name,
100                      bool include_traceback_header) {
101   root_.name = std::move(name);
102   root_.original_sql = sql;
103   root_.rewritten_sql = std::move(sql);
104   root_.include_traceback_header = include_traceback_header;
105 }
106 
FromExecuteQuery(std::string sql)107 SqlSource SqlSource::FromExecuteQuery(std::string sql) {
108   return {std::move(sql), "File \"stdin\"", true};
109 }
110 
FromMetric(std::string sql,const std::string & name)111 SqlSource SqlSource::FromMetric(std::string sql, const std::string& name) {
112   return {std::move(sql), "Metric \"" + name + "\"", true};
113 }
114 
FromMetricFile(std::string sql,const std::string & name)115 SqlSource SqlSource::FromMetricFile(std::string sql, const std::string& name) {
116   return {std::move(sql), "Metric file \"" + name + "\"", false};
117 }
118 
FromModuleInclude(std::string sql,const std::string & module)119 SqlSource SqlSource::FromModuleInclude(std::string sql,
120                                        const std::string& module) {
121   return {std::move(sql), "Module include \"" + module + "\"", false};
122 }
123 
FromTraceProcessorImplementation(std::string sql)124 SqlSource SqlSource::FromTraceProcessorImplementation(std::string sql) {
125   return {std::move(sql), "Trace Processor Internal", false};
126 }
127 
AsTraceback(uint32_t offset) const128 std::string SqlSource::AsTraceback(uint32_t offset) const {
129   return root_.AsTraceback(offset);
130 }
131 
AsTracebackForSqliteOffset(std::optional<uint32_t> opt_offset) const132 std::string SqlSource::AsTracebackForSqliteOffset(
133     std::optional<uint32_t> opt_offset) const {
134   uint32_t offset = opt_offset.value_or(0);
135   // It's possible for SQLite in rare cases to return an out-of-bounds
136   // offset. This has been reported upstream; for now workaround this
137   // by using zero as the offset if it's out of bounds.
138   if (offset > sql().size()) {
139     offset = 0;
140   }
141   return AsTraceback(offset);
142 }
143 
Substr(uint32_t offset,uint32_t len) const144 SqlSource SqlSource::Substr(uint32_t offset, uint32_t len) const {
145   SqlSource source;
146   source.root_ = root_.Substr(offset, len);
147   return source;
148 }
149 
RewriteAllIgnoreExisting(SqlSource source) const150 SqlSource SqlSource::RewriteAllIgnoreExisting(SqlSource source) const {
151   // Reset any rewrites.
152   SqlSource copy = *this;
153   copy.root_.rewritten_sql = copy.root_.original_sql;
154   copy.root_.rewrites.clear();
155 
156   SqlSource::Rewriter rewriter(std::move(copy));
157   rewriter.Rewrite(0, static_cast<uint32_t>(root_.original_sql.size()),
158                    std::move(source));
159   return std::move(rewriter).Build();
160 }
161 
ApplyRewrites(const std::string & original_sql,const std::vector<Rewrite> & rewrites)162 std::string SqlSource::ApplyRewrites(const std::string& original_sql,
163                                      const std::vector<Rewrite>& rewrites) {
164   std::string sql;
165   uint32_t prev_idx = 0;
166   for (const auto& rewrite : rewrites) {
167     PERFETTO_CHECK(prev_idx <= rewrite.original_sql_start);
168     sql.append(
169         original_sql.substr(prev_idx, rewrite.original_sql_start - prev_idx));
170     sql.append(rewrite.rewrite_node.rewritten_sql);
171     prev_idx = rewrite.original_sql_end;
172   }
173   sql.append(original_sql.substr(prev_idx, original_sql.size() - prev_idx));
174   return sql;
175 }
176 
AsTraceback(uint32_t rewritten_offset) const177 std::string SqlSource::Node::AsTraceback(uint32_t rewritten_offset) const {
178   PERFETTO_CHECK(rewritten_offset <= rewritten_sql.size());
179   uint32_t original_offset = RewrittenOffsetToOriginalOffset(rewritten_offset);
180   std::string res = SelfTraceback(rewritten_offset, original_offset);
181   if (auto opt_idx = RewriteForOriginalOffset(original_offset); opt_idx) {
182     const Rewrite& rewrite = rewrites[*opt_idx];
183     PERFETTO_CHECK(rewritten_offset >= rewrite.rewritten_sql_start);
184     PERFETTO_CHECK(rewritten_offset < rewrite.rewritten_sql_end);
185     res.append(rewrite.rewrite_node.AsTraceback(rewritten_offset -
186                                                 rewrite.rewritten_sql_start));
187   }
188   return res;
189 }
190 
SelfTraceback(uint32_t rewritten_offset,uint32_t original_offset) const191 std::string SqlSource::Node::SelfTraceback(uint32_t rewritten_offset,
192                                            uint32_t original_offset) const {
193   PERFETTO_DCHECK(original_offset <= original_sql.size());
194   auto [o_context, o_caret_pos] =
195       SqlContextAndCaretPos(original_sql, original_offset);
196   std::string header;
197   if (include_traceback_header) {
198     if (!rewrites.empty()) {
199       auto [r_context, r_caret_pos] =
200           SqlContextAndCaretPos(rewritten_sql, rewritten_offset);
201       std::string caret = std::string(r_caret_pos, ' ') + "^";
202       base::StackString<1024> str("Fully expanded statement\n  %s\n  %s\n",
203                                   r_context.c_str(), caret.c_str());
204       header.append(str.c_str());
205     }
206     header += "Traceback (most recent call last):\n";
207   }
208 
209   auto line_and_col =
210       GetLineAndColumnForOffset(original_sql, line, col, original_offset);
211   std::string caret = std::string(o_caret_pos, ' ') + "^";
212   base::StackString<1024> str("%s  %s line %u col %u\n    %s\n    %s\n",
213                               header.c_str(), name.c_str(), line_and_col.first,
214                               line_and_col.second, o_context.c_str(),
215                               caret.c_str());
216   return str.ToStdString();
217 }
218 
Substr(uint32_t offset,uint32_t len) const219 SqlSource::Node SqlSource::Node::Substr(uint32_t offset, uint32_t len) const {
220   uint32_t offset_end = offset + len;
221   PERFETTO_CHECK(offset_end <= rewritten_sql.size());
222 
223   uint32_t original_offset_start = RewrittenOffsetToOriginalOffset(offset);
224   uint32_t original_offset_end = RewrittenOffsetToOriginalOffset(offset_end);
225   std::vector<Rewrite> new_rewrites;
226   for (const Rewrite& rewrite : rewrites) {
227     if (offset >= rewrite.rewritten_sql_end) {
228       continue;
229     }
230     if (offset_end < rewrite.rewritten_sql_start) {
231       break;
232     }
233     // Special case: when the end of the substr is in the middle of a rewrite,
234     // we actually want to capture the original SQL up to the end of the
235     // rewrite, not just to the start as |ChildRewrittenOffset| returns.
236     if (offset_end < rewrite.rewritten_sql_end) {
237       original_offset_end = rewrite.original_sql_end;
238     }
239     uint32_t bounded_start = std::max(offset, rewrite.rewritten_sql_start);
240     uint32_t bounded_end = std::min(offset_end, rewrite.rewritten_sql_end);
241 
242     uint32_t nested_start = bounded_start - rewrite.rewritten_sql_start;
243     uint32_t nested_len = bounded_end - bounded_start;
244 
245     new_rewrites.push_back(Rewrite{
246         rewrite.original_sql_start - original_offset_start,
247         rewrite.original_sql_end - original_offset_start,
248         bounded_start - offset,
249         bounded_end - offset,
250         rewrite.rewrite_node.Substr(nested_start, nested_len),
251     });
252   }
253   std::string new_original = original_sql.substr(
254       original_offset_start, original_offset_end - original_offset_start);
255   std::string new_rewritten = rewritten_sql.substr(offset, len);
256   PERFETTO_DCHECK(ApplyRewrites(new_original, new_rewrites) == new_rewritten);
257 
258   auto line_and_col =
259       GetLineAndColumnForOffset(original_sql, line, col, original_offset_start);
260   return Node{
261       name,
262       include_traceback_header,
263       line_and_col.first,
264       line_and_col.second,
265       new_original,
266       std::move(new_rewrites),
267       new_rewritten,
268   };
269 }
270 
RewrittenOffsetToOriginalOffset(uint32_t rewritten_offset) const271 uint32_t SqlSource::Node::RewrittenOffsetToOriginalOffset(
272     uint32_t rewritten_offset) const {
273   uint32_t remaining = rewritten_offset;
274   for (const Rewrite& rewrite : rewrites) {
275     if (rewritten_offset >= rewrite.rewritten_sql_end) {
276       remaining -= rewrite.rewritten_sql_end - rewrite.rewritten_sql_start;
277       remaining += rewrite.original_sql_end - rewrite.original_sql_start;
278       continue;
279     }
280     if (rewritten_offset < rewrite.rewritten_sql_start) {
281       break;
282     }
283     // IMPORTANT: if the rewritten offset is anywhere inside a rewrite, we just
284     // map the original offset to point to the start of the rewrite. This is
285     // the only sane way we can handle arbitrary transformations of the
286     // original sql.
287     return rewrite.original_sql_start;
288   }
289   return remaining;
290 }
291 
RewriteForOriginalOffset(uint32_t original_offset) const292 std::optional<uint32_t> SqlSource::Node::RewriteForOriginalOffset(
293     uint32_t original_offset) const {
294   for (uint32_t i = 0; i < rewrites.size(); ++i) {
295     if (original_offset >= rewrites[i].original_sql_start &&
296         original_offset < rewrites[i].original_sql_end) {
297       return i;
298     }
299   }
300   return std::nullopt;
301 }
302 
Rewriter(SqlSource source)303 SqlSource::Rewriter::Rewriter(SqlSource source)
304     : Rewriter(std::move(source.root_)) {}
Rewriter(Node source)305 SqlSource::Rewriter::Rewriter(Node source) : orig_(std::move(source)) {
306   // Note: it's important that we *don't* move out of |orig_| here as we want to
307   // be able to access the untouched offsets through
308   // calls to |RewrittenOffsetToOriginalOffset| etc.
309   for (const SqlSource::Rewrite& rewrite : orig_.rewrites) {
310     nested_.push_back(SqlSource::Rewriter(rewrite.rewrite_node));
311   }
312 }
313 
Rewrite(uint32_t rewritten_start,uint32_t rewritten_end,SqlSource source)314 void SqlSource::Rewriter::Rewrite(uint32_t rewritten_start,
315                                   uint32_t rewritten_end,
316                                   SqlSource source) {
317   PERFETTO_CHECK(rewritten_start <= rewritten_end);
318   PERFETTO_CHECK(rewritten_end <= orig_.rewritten_sql.size());
319 
320   uint32_t original_start =
321       orig_.RewrittenOffsetToOriginalOffset(rewritten_start);
322   std::optional<uint32_t> maybe_rewrite =
323       orig_.RewriteForOriginalOffset(original_start);
324   if (maybe_rewrite) {
325     const SqlSource::Rewrite& rewrite = orig_.rewrites[*maybe_rewrite];
326     nested_[*maybe_rewrite].Rewrite(
327         rewritten_start - rewrite.rewritten_sql_start,
328         rewritten_end - rewrite.rewritten_sql_start, std::move(source));
329   } else {
330     uint32_t original_end =
331         orig_.RewrittenOffsetToOriginalOffset(rewritten_end);
332     non_nested_.push_back(SqlSource::Rewrite{
333         original_start,
334         original_end,
335         std::numeric_limits<uint32_t>::max(),  // Dummy, corrected in |Build|.
336         std::numeric_limits<uint32_t>::max(),  // Dummy, corrected in |Build|.
337         std::move(source.root_),
338     });
339   }
340 }
341 
Build()342 SqlSource SqlSource::Rewriter::Build() && {
343   // Phase 1: finalize all the nested rewrites and merge both nested and
344   // non-nested into a single vector.
345   std::vector<SqlSource::Rewrite> all_rewrites = std::move(non_nested_);
346   for (uint32_t i = 0; i < nested_.size(); ++i) {
347     const SqlSource::Rewrite orig_rewrite = orig_.rewrites[i];
348     all_rewrites.push_back(SqlSource::Rewrite{
349         orig_rewrite.original_sql_start,
350         orig_rewrite.original_sql_end,
351         std::numeric_limits<uint32_t>::max(),  // Dummy, corrected in phase 3.
352         std::numeric_limits<uint32_t>::max(),  // Dummy, corrected in phase 3.
353         std::move(nested_[i]).Build().root_,
354     });
355   }
356 
357   // Phase 2: sort the new rewrite vector by original offset and verify that the
358   // original offsets are monotonic and non-overlapping.
359   std::sort(all_rewrites.begin(), all_rewrites.end(),
360             [](const SqlSource::Rewrite& a, const SqlSource::Rewrite& b) {
361               return a.original_sql_start < b.original_sql_start;
362             });
363   for (uint32_t i = 1; i < all_rewrites.size(); ++i) {
364     PERFETTO_CHECK(all_rewrites[i - 1].original_sql_end <=
365                    all_rewrites[i].original_sql_start);
366   }
367 
368   // Phase 3: compute the new rewritten offsets and assign them to the rewrites.
369   // Also unset the traceback flag for all rewrites.
370   uint32_t original_bytes_in_rewrites = 0;
371   uint32_t rewritten_bytes_in_rewrites = 0;
372   for (SqlSource::Rewrite& rewrite : all_rewrites) {
373     uint32_t source_size =
374         static_cast<uint32_t>(rewrite.rewrite_node.rewritten_sql.size());
375 
376     rewrite.rewritten_sql_start = rewrite.original_sql_start +
377                                   rewritten_bytes_in_rewrites -
378                                   original_bytes_in_rewrites;
379     rewrite.rewritten_sql_end = rewrite.rewritten_sql_start + source_size;
380     rewrite.rewrite_node.include_traceback_header = false;
381 
382     original_bytes_in_rewrites +=
383         rewrite.original_sql_end - rewrite.original_sql_start;
384     rewritten_bytes_in_rewrites += source_size;
385   }
386 
387   // Phase 4: update the node to reflect the new rewrites.
388   orig_.rewrites = std::move(all_rewrites);
389   orig_.rewritten_sql = ApplyRewrites(orig_.original_sql, orig_.rewrites);
390   return SqlSource(std::move(orig_));
391 }
392 
393 }  // namespace perfetto::trace_processor
394