1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
18 #define SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
19 
20 #include <cstddef>
21 #include <cstdint>
22 #include <limits>
23 #include <optional>
24 #include <string>
25 #include <utility>
26 #include <vector>
27 
28 #include "perfetto/base/logging.h"
29 #include "perfetto/base/status.h"
30 #include "perfetto/ext/base/flat_hash_map.h"
31 #include "perfetto/ext/base/string_splitter.h"
32 #include "perfetto/ext/base/string_utils.h"
33 #include "perfetto/trace_processor/basic_types.h"
34 #include "src/trace_processor/sqlite/bindings/sqlite_module.h"
35 #include "src/trace_processor/sqlite/module_lifecycle_manager.h"
36 #include "src/trace_processor/sqlite/sqlite_engine.h"
37 
38 namespace perfetto::trace_processor {
39 
40 class PerfettoSqlEngine;
41 struct SpanJoinOperatorModule;
42 
43 // Implements the SPAN JOIN operation between two tables on a particular column.
44 //
45 // Span:
46 // A span is a row with a timestamp and a duration. It is used to model
47 // operations which run for a particular *span* of time.
48 //
49 // We draw spans like so (time on the x-axis):
50 // start of span->[ time where opertion is running ]<- end of span
51 //
52 // Multiple spans can happen in parallel:
53 // [      ]
54 //    [        ]
55 //   [                    ]
56 //  [ ]
57 //
58 // The above for example, models scheduling activity on a 4-core computer for a
59 // short period of time.
60 //
61 // Span join:
62 // The span join operation can be thought of as the intersection of span tables.
63 // That is, the join table has a span for each pair of spans in the child tables
64 // where the spans overlap. Because many spans are possible in parallel, an
65 // extra metadata column (labelled the "join column") is used to distinguish
66 // between the spanned tables.
67 //
68 // For a given join key suppose these were the two span tables:
69 // Table 1:   [        ]              [      ]         [ ]
70 // Table 2:          [      ]            [  ]           [      ]
71 // Output :          [ ]                 [  ]           []
72 //
73 // All other columns apart from timestamp (ts), duration (dur) and the join key
74 // are passed through unchanged.
75 struct SpanJoinOperatorModule : public sqlite::Module<SpanJoinOperatorModule> {
76  public:
77   static constexpr uint32_t kSourceGeqOpCode =
78       SQLITE_INDEX_CONSTRAINT_FUNCTION + 1;
79 
80   struct State;
81 
82   // Enum indicating whether the queries on the two inner tables should
83   // emit shadows.
84   enum class EmitShadowType {
85     // Used when the table should emit all shadow slices (both present and
86     // missing partition shadows).
87     kAll,
88 
89     // Used when the table should only emit shadows for partitions which are
90     // present.
91     kPresentPartitionOnly,
92 
93     // Used when the table should emit no shadow slices.
94     kNone,
95   };
96 
97   // Parsed version of a table descriptor.
98   struct TableDescriptor {
99     static base::Status Parse(const std::string& raw_descriptor,
100                               TableDescriptor* descriptor);
101 
IsPartitionedSpanJoinOperatorModule::TableDescriptor102     bool IsPartitioned() const { return !partition_col.empty(); }
103 
104     std::string name;
105     std::string partition_col;
106   };
107 
108   // Contains the definition of the child tables.
109   class TableDefinition {
110    public:
111     TableDefinition() = default;
112 
113     TableDefinition(std::string name,
114                     std::string partition_col,
115                     std::vector<std::pair<SqlValue::Type, std::string>> cols,
116                     EmitShadowType emit_shadow_type,
117                     uint32_t ts_idx,
118                     uint32_t dur_idx,
119                     uint32_t partition_idx);
120 
121     static base::Status Create(PerfettoSqlEngine* engine,
122                                const TableDescriptor& desc,
123                                EmitShadowType emit_shadow_type,
124                                TableDefinition* defn);
125 
126     // Creates an SQL query from the constraints and index,
127     std::string CreateSqlQuery(base::StringSplitter&,
128                                sqlite3_value** argv) const;
129 
130     // Creates the section of the "CREATE TABLE" corresponding to this
131     // definition.
132     std::string CreateVtabCreateTableSection() const;
133 
134     // Returns whether this table should emit present partition shadow slices.
ShouldEmitPresentPartitionShadowSpanJoinOperatorModule135     bool ShouldEmitPresentPartitionShadow() const {
136       return emit_shadow_type_ == EmitShadowType::kAll ||
137              emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly;
138     }
139 
140     // Returns whether this table should emit missing partition shadow slices.
ShouldEmitMissingPartitionShadowSpanJoinOperatorModule141     bool ShouldEmitMissingPartitionShadow() const {
142       return emit_shadow_type_ == EmitShadowType::kAll;
143     }
144 
145     // Returns whether the table is partitioned.
IsPartitionedSpanJoinOperatorModule146     bool IsPartitioned() const { return !partition_col_.empty(); }
147 
nameSpanJoinOperatorModule148     const std::string& name() const { return name_; }
partition_colSpanJoinOperatorModule149     const std::string& partition_col() const { return partition_col_; }
columnsSpanJoinOperatorModule150     const std::vector<std::pair<SqlValue::Type, std::string>>& columns() const {
151       return cols_;
152     }
153 
ts_idxSpanJoinOperatorModule154     uint32_t ts_idx() const { return ts_idx_; }
dur_idxSpanJoinOperatorModule155     uint32_t dur_idx() const { return dur_idx_; }
partition_idxSpanJoinOperatorModule156     uint32_t partition_idx() const { return partition_idx_; }
157 
158    private:
159     EmitShadowType emit_shadow_type_ = EmitShadowType::kNone;
160 
161     std::string name_;
162     std::string partition_col_;
163     std::vector<std::pair<SqlValue::Type, std::string>> cols_;
164 
165     uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max();
166     uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max();
167     uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max();
168   };
169 
170   // Stores information about a single subquery into one of the two child
171   // tables.
172   //
173   // This class is implemented as a state machine which steps from one slice to
174   // the next.
175   class Query {
176    public:
177     // Enum encoding the current state of the query in the state machine.
178     enum class State {
179       // Encodes that the current slice is a real slice (i.e. comes directly
180       // from the cursor).
181       kReal,
182 
183       // Encodes that the current slice is on a partition for which there is a
184       // real slice present.
185       kPresentPartitionShadow,
186 
187       // Encodes that the current slice is on a paritition(s) for which there is
188       // no real slice for those partition(s).
189       kMissingPartitionShadow,
190 
191       // Encodes that this query has reached the end.
192       kEof,
193     };
194 
195     Query(SpanJoinOperatorModule::State*, const TableDefinition*);
196     virtual ~Query();
197 
198     Query(Query&&) noexcept = default;
199     Query& operator=(Query&&) = default;
200 
201     enum class InitialEofBehavior {
202       kTreatAsEof,
203       kTreatAsMissingPartitionShadow
204     };
205 
206     // Initializes the query with the given constraints and query parameters.
207     base::Status Initialize(
208         std::string sql,
209         InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof);
210 
211     // Forwards the query to the next valid slice.
212     base::Status Next();
213 
214     // Rewinds the query to the first valid slice
215     // This is used in the mixed partitioning case where the query with no
216     // partitions is rewound to the start on every new partition.
217     base::Status Rewind();
218 
219     // Reports the column at the given index to given context.
220     void ReportSqliteResult(sqlite3_context* context, size_t index);
221 
222     // Returns whether the cursor has reached eof.
IsEofSpanJoinOperatorModule223     bool IsEof() const { return state_ == State::kEof; }
224 
225     // Returns whether the current slice pointed to is a real slice.
IsRealSpanJoinOperatorModule226     bool IsReal() const { return state_ == State::kReal; }
227 
228     // Returns the first partition this slice covers (for real/single partition
229     // shadows, this is the same as partition()).
230     // This partition encodes a [start, end] (closed at start and at end) range
231     // of partitions which works as the partitions are integers.
FirstPartitionSpanJoinOperatorModule232     int64_t FirstPartition() const {
233       PERFETTO_DCHECK(!IsEof());
234       return IsMissingPartitionShadow() ? missing_partition_start_
235                                         : partition();
236     }
237 
238     // Returns the last partition this slice covers (for real/single partition
239     // shadows, this is the same as partition()).
240     // This partition encodes a [start, end] (closed at start and at end) range
241     // of partitions which works as the partitions are integers.
LastPartitionSpanJoinOperatorModule242     int64_t LastPartition() const {
243       PERFETTO_DCHECK(!IsEof());
244       return IsMissingPartitionShadow() ? missing_partition_end_ - 1
245                                         : partition();
246     }
247 
248     // Returns the end timestamp of this slice adjusted to ensure that -1
249     // duration slices always returns ts.
AdjustedTsEndSpanJoinOperatorModule250     int64_t AdjustedTsEnd() const {
251       PERFETTO_DCHECK(!IsEof());
252       return ts_end_ - ts() == -1 ? ts() : ts_end_;
253     }
254 
tsSpanJoinOperatorModule255     int64_t ts() const {
256       PERFETTO_DCHECK(!IsEof());
257       return ts_;
258     }
partitionSpanJoinOperatorModule259     int64_t partition() const {
260       PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned());
261       return partition_;
262     }
263 
raw_ts_endSpanJoinOperatorModule264     int64_t raw_ts_end() const {
265       PERFETTO_DCHECK(!IsEof());
266       return ts_end_;
267     }
268 
definitionSpanJoinOperatorModule269     const TableDefinition* definition() const { return defn_; }
270 
271    private:
272     Query(Query&) = delete;
273     Query& operator=(const Query&) = delete;
274 
275     // Returns whether the current slice pointed to is a valid slice.
276     bool IsValidSlice();
277 
278     // Forwards the query to the next valid slice.
279     base::Status FindNextValidSlice();
280 
281     // Advances the query state machine by one slice.
282     base::Status NextSliceState();
283 
284     // Forwards the cursor to point to the next real slice.
285     base::Status CursorNext();
286 
287     // Returns whether the current slice pointed to is a present partition
288     // shadow.
IsPresentPartitionShadowSpanJoinOperatorModule289     bool IsPresentPartitionShadow() const {
290       return state_ == State::kPresentPartitionShadow;
291     }
292 
293     // Returns whether the current slice pointed to is a missing partition
294     // shadow.
IsMissingPartitionShadowSpanJoinOperatorModule295     bool IsMissingPartitionShadow() const {
296       return state_ == State::kMissingPartitionShadow;
297     }
298 
299     // Returns whether the current slice pointed to is an empty shadow.
IsEmptyShadowSpanJoinOperatorModule300     bool IsEmptyShadow() const {
301       PERFETTO_DCHECK(!IsEof());
302       return (!IsReal() && ts_ == ts_end_) ||
303              (IsMissingPartitionShadow() &&
304               missing_partition_start_ == missing_partition_end_);
305     }
306 
CursorTsSpanJoinOperatorModule307     int64_t CursorTs() const {
308       PERFETTO_DCHECK(!cursor_eof_);
309       auto ts_idx = static_cast<int>(defn_->ts_idx());
310       return sqlite3_column_int64(stmt_->sqlite_stmt(), ts_idx);
311     }
312 
CursorDurSpanJoinOperatorModule313     int64_t CursorDur() const {
314       PERFETTO_DCHECK(!cursor_eof_);
315       auto dur_idx = static_cast<int>(defn_->dur_idx());
316       return sqlite3_column_int64(stmt_->sqlite_stmt(), dur_idx);
317     }
318 
CursorPartitionSpanJoinOperatorModule319     int64_t CursorPartition() const {
320       PERFETTO_DCHECK(!cursor_eof_);
321       PERFETTO_DCHECK(defn_->IsPartitioned());
322       auto partition_idx = static_cast<int>(defn_->partition_idx());
323       return sqlite3_column_int64(stmt_->sqlite_stmt(), partition_idx);
324     }
325 
326     State state_ = State::kMissingPartitionShadow;
327     bool cursor_eof_ = false;
328 
329     // Only valid when |state_| != kEof.
330     int64_t ts_ = 0;
331     int64_t ts_end_ = std::numeric_limits<int64_t>::max();
332 
333     // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow.
334     int64_t partition_ = std::numeric_limits<int64_t>::min();
335 
336     // Only valid when |state_| == kMissingPartitionShadow.
337     int64_t missing_partition_start_ = 0;
338     int64_t missing_partition_end_ = 0;
339 
340     std::string sql_query_;
341     std::optional<SqliteEngine::PreparedStatement> stmt_;
342 
343     const TableDefinition* defn_ = nullptr;
344     SpanJoinOperatorModule::State* in_state_ = nullptr;
345   };
346 
347   // Columns of the span operator table.
348   enum Column {
349     kTimestamp = 0,
350     kDuration = 1,
351     kPartition = 2,
352     // All other columns are dynamic depending on the joined tables.
353   };
354 
355   // Enum indicating the possible partitionings of the two tables in span join.
356   enum class PartitioningType {
357     // Used when both tables don't have a partition specified.
358     kNoPartitioning = 0,
359 
360     // Used when both tables have the same partition specified.
361     kSamePartitioning = 1,
362 
363     // Used when one table has a partition and the other table doesn't.
364     kMixedPartitioning = 2
365   };
366 
367   // Identifier for a column by index in a given table.
368   struct ColumnLocator {
369     const TableDefinition* defn;
370     size_t col_index;
371   };
372 
373   struct Context {
ContextSpanJoinOperatorModule::Context374     explicit Context(PerfettoSqlEngine* _engine) : engine(_engine) {}
375 
376     PerfettoSqlEngine* engine;
377     sqlite::ModuleStateManager<SpanJoinOperatorModule> manager;
378   };
379   struct State {
IsLeftJoinSpanJoinOperatorModule::State380     bool IsLeftJoin() const {
381       return base::CaseInsensitiveEqual(module_name, "span_left_join");
382     }
IsOuterJoinSpanJoinOperatorModule::State383     bool IsOuterJoin() const {
384       return base::CaseInsensitiveEqual(module_name, "span_outer_join");
385     }
386 
partition_colSpanJoinOperatorModule::State387     const std::string& partition_col() const {
388       return t1_defn.IsPartitioned() ? t1_defn.partition_col()
389                                      : t2_defn.partition_col();
390     }
391 
392     std::string GetNameForGlobalColumnIndex(const TableDefinition& defn,
393                                             int global_column);
394 
395     std::string BestIndexStrForDefinition(const sqlite3_index_info* info,
396                                           const TableDefinition& defn);
397 
398     void PopulateColumnLocatorMap(uint32_t);
399 
400     PerfettoSqlEngine* engine;
401     std::string module_name;
402     std::string create_table_stmt;
403     TableDefinition t1_defn;
404     TableDefinition t2_defn;
405     PartitioningType partitioning;
406     base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator;
407   };
408   struct Vtab : public sqlite3_vtab {
409     sqlite::ModuleStateManager<SpanJoinOperatorModule>::PerVtabState* state;
410   };
411 
412   // Base class for a cursor on the span table.
413   struct Cursor final : public sqlite3_vtab_cursor {
CursorSpanJoinOperatorModule::final414     explicit Cursor(State* _state)
415         : t1(_state, &_state->t1_defn),
416           t2(_state, &_state->t2_defn),
417           state(_state) {}
418 
419     bool IsOverlappingSpan() const;
420     base::Status FindOverlappingSpan();
421     Query* FindEarliestFinishQuery();
422 
423     Query t1;
424     Query t2;
425 
426     Query* next_query = nullptr;
427 
428     // Only valid for kMixedPartition.
429     int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min();
430 
431     State* state;
432   };
433 
434   static constexpr bool kSupportsWrites = false;
435 
436   static int Create(sqlite3*,
437                     void*,
438                     int,
439                     const char* const*,
440                     sqlite3_vtab**,
441                     char**);
442   static int Destroy(sqlite3_vtab*);
443 
444   static int Connect(sqlite3*,
445                      void*,
446                      int,
447                      const char* const*,
448                      sqlite3_vtab**,
449                      char**);
450   static int Disconnect(sqlite3_vtab*);
451 
452   static int BestIndex(sqlite3_vtab*, sqlite3_index_info*);
453 
454   static int Open(sqlite3_vtab*, sqlite3_vtab_cursor**);
455   static int Close(sqlite3_vtab_cursor*);
456 
457   static int Filter(sqlite3_vtab_cursor*,
458                     int,
459                     const char*,
460                     int,
461                     sqlite3_value**);
462   static int Next(sqlite3_vtab_cursor*);
463   static int Eof(sqlite3_vtab_cursor*);
464   static int Column(sqlite3_vtab_cursor*, sqlite3_context*, int);
465   static int Rowid(sqlite3_vtab_cursor*, sqlite_int64*);
466 
467   static int FindFunction(sqlite3_vtab*,
468                           int,
469                           const char*,
470                           FindFunctionFn**,
471                           void**);
472 
473   // This needs to happen at the end as it depends on the functions
474   // defined above.
475   static constexpr sqlite3_module kModule = CreateModule();
476 };
477 
478 }  // namespace perfetto::trace_processor
479 
480 #endif  // SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_
481