1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 18 #define SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 19 20 #include <cstddef> 21 #include <cstdint> 22 #include <limits> 23 #include <optional> 24 #include <string> 25 #include <utility> 26 #include <vector> 27 28 #include "perfetto/base/logging.h" 29 #include "perfetto/base/status.h" 30 #include "perfetto/ext/base/flat_hash_map.h" 31 #include "perfetto/ext/base/string_splitter.h" 32 #include "perfetto/ext/base/string_utils.h" 33 #include "perfetto/trace_processor/basic_types.h" 34 #include "src/trace_processor/sqlite/bindings/sqlite_module.h" 35 #include "src/trace_processor/sqlite/module_lifecycle_manager.h" 36 #include "src/trace_processor/sqlite/sqlite_engine.h" 37 38 namespace perfetto::trace_processor { 39 40 class PerfettoSqlEngine; 41 struct SpanJoinOperatorModule; 42 43 // Implements the SPAN JOIN operation between two tables on a particular column. 44 // 45 // Span: 46 // A span is a row with a timestamp and a duration. It is used to model 47 // operations which run for a particular *span* of time. 48 // 49 // We draw spans like so (time on the x-axis): 50 // start of span->[ time where opertion is running ]<- end of span 51 // 52 // Multiple spans can happen in parallel: 53 // [ ] 54 // [ ] 55 // [ ] 56 // [ ] 57 // 58 // The above for example, models scheduling activity on a 4-core computer for a 59 // short period of time. 60 // 61 // Span join: 62 // The span join operation can be thought of as the intersection of span tables. 63 // That is, the join table has a span for each pair of spans in the child tables 64 // where the spans overlap. Because many spans are possible in parallel, an 65 // extra metadata column (labelled the "join column") is used to distinguish 66 // between the spanned tables. 67 // 68 // For a given join key suppose these were the two span tables: 69 // Table 1: [ ] [ ] [ ] 70 // Table 2: [ ] [ ] [ ] 71 // Output : [ ] [ ] [] 72 // 73 // All other columns apart from timestamp (ts), duration (dur) and the join key 74 // are passed through unchanged. 75 struct SpanJoinOperatorModule : public sqlite::Module<SpanJoinOperatorModule> { 76 public: 77 static constexpr uint32_t kSourceGeqOpCode = 78 SQLITE_INDEX_CONSTRAINT_FUNCTION + 1; 79 80 struct State; 81 82 // Enum indicating whether the queries on the two inner tables should 83 // emit shadows. 84 enum class EmitShadowType { 85 // Used when the table should emit all shadow slices (both present and 86 // missing partition shadows). 87 kAll, 88 89 // Used when the table should only emit shadows for partitions which are 90 // present. 91 kPresentPartitionOnly, 92 93 // Used when the table should emit no shadow slices. 94 kNone, 95 }; 96 97 // Parsed version of a table descriptor. 98 struct TableDescriptor { 99 static base::Status Parse(const std::string& raw_descriptor, 100 TableDescriptor* descriptor); 101 IsPartitionedSpanJoinOperatorModule::TableDescriptor102 bool IsPartitioned() const { return !partition_col.empty(); } 103 104 std::string name; 105 std::string partition_col; 106 }; 107 108 // Contains the definition of the child tables. 109 class TableDefinition { 110 public: 111 TableDefinition() = default; 112 113 TableDefinition(std::string name, 114 std::string partition_col, 115 std::vector<std::pair<SqlValue::Type, std::string>> cols, 116 EmitShadowType emit_shadow_type, 117 uint32_t ts_idx, 118 uint32_t dur_idx, 119 uint32_t partition_idx); 120 121 static base::Status Create(PerfettoSqlEngine* engine, 122 const TableDescriptor& desc, 123 EmitShadowType emit_shadow_type, 124 TableDefinition* defn); 125 126 // Creates an SQL query from the constraints and index, 127 std::string CreateSqlQuery(base::StringSplitter&, 128 sqlite3_value** argv) const; 129 130 // Creates the section of the "CREATE TABLE" corresponding to this 131 // definition. 132 std::string CreateVtabCreateTableSection() const; 133 134 // Returns whether this table should emit present partition shadow slices. ShouldEmitPresentPartitionShadowSpanJoinOperatorModule135 bool ShouldEmitPresentPartitionShadow() const { 136 return emit_shadow_type_ == EmitShadowType::kAll || 137 emit_shadow_type_ == EmitShadowType::kPresentPartitionOnly; 138 } 139 140 // Returns whether this table should emit missing partition shadow slices. ShouldEmitMissingPartitionShadowSpanJoinOperatorModule141 bool ShouldEmitMissingPartitionShadow() const { 142 return emit_shadow_type_ == EmitShadowType::kAll; 143 } 144 145 // Returns whether the table is partitioned. IsPartitionedSpanJoinOperatorModule146 bool IsPartitioned() const { return !partition_col_.empty(); } 147 nameSpanJoinOperatorModule148 const std::string& name() const { return name_; } partition_colSpanJoinOperatorModule149 const std::string& partition_col() const { return partition_col_; } columnsSpanJoinOperatorModule150 const std::vector<std::pair<SqlValue::Type, std::string>>& columns() const { 151 return cols_; 152 } 153 ts_idxSpanJoinOperatorModule154 uint32_t ts_idx() const { return ts_idx_; } dur_idxSpanJoinOperatorModule155 uint32_t dur_idx() const { return dur_idx_; } partition_idxSpanJoinOperatorModule156 uint32_t partition_idx() const { return partition_idx_; } 157 158 private: 159 EmitShadowType emit_shadow_type_ = EmitShadowType::kNone; 160 161 std::string name_; 162 std::string partition_col_; 163 std::vector<std::pair<SqlValue::Type, std::string>> cols_; 164 165 uint32_t ts_idx_ = std::numeric_limits<uint32_t>::max(); 166 uint32_t dur_idx_ = std::numeric_limits<uint32_t>::max(); 167 uint32_t partition_idx_ = std::numeric_limits<uint32_t>::max(); 168 }; 169 170 // Stores information about a single subquery into one of the two child 171 // tables. 172 // 173 // This class is implemented as a state machine which steps from one slice to 174 // the next. 175 class Query { 176 public: 177 // Enum encoding the current state of the query in the state machine. 178 enum class State { 179 // Encodes that the current slice is a real slice (i.e. comes directly 180 // from the cursor). 181 kReal, 182 183 // Encodes that the current slice is on a partition for which there is a 184 // real slice present. 185 kPresentPartitionShadow, 186 187 // Encodes that the current slice is on a paritition(s) for which there is 188 // no real slice for those partition(s). 189 kMissingPartitionShadow, 190 191 // Encodes that this query has reached the end. 192 kEof, 193 }; 194 195 Query(SpanJoinOperatorModule::State*, const TableDefinition*); 196 virtual ~Query(); 197 198 Query(Query&&) noexcept = default; 199 Query& operator=(Query&&) = default; 200 201 enum class InitialEofBehavior { 202 kTreatAsEof, 203 kTreatAsMissingPartitionShadow 204 }; 205 206 // Initializes the query with the given constraints and query parameters. 207 base::Status Initialize( 208 std::string sql, 209 InitialEofBehavior eof_behavior = InitialEofBehavior::kTreatAsEof); 210 211 // Forwards the query to the next valid slice. 212 base::Status Next(); 213 214 // Rewinds the query to the first valid slice 215 // This is used in the mixed partitioning case where the query with no 216 // partitions is rewound to the start on every new partition. 217 base::Status Rewind(); 218 219 // Reports the column at the given index to given context. 220 void ReportSqliteResult(sqlite3_context* context, size_t index); 221 222 // Returns whether the cursor has reached eof. IsEofSpanJoinOperatorModule223 bool IsEof() const { return state_ == State::kEof; } 224 225 // Returns whether the current slice pointed to is a real slice. IsRealSpanJoinOperatorModule226 bool IsReal() const { return state_ == State::kReal; } 227 228 // Returns the first partition this slice covers (for real/single partition 229 // shadows, this is the same as partition()). 230 // This partition encodes a [start, end] (closed at start and at end) range 231 // of partitions which works as the partitions are integers. FirstPartitionSpanJoinOperatorModule232 int64_t FirstPartition() const { 233 PERFETTO_DCHECK(!IsEof()); 234 return IsMissingPartitionShadow() ? missing_partition_start_ 235 : partition(); 236 } 237 238 // Returns the last partition this slice covers (for real/single partition 239 // shadows, this is the same as partition()). 240 // This partition encodes a [start, end] (closed at start and at end) range 241 // of partitions which works as the partitions are integers. LastPartitionSpanJoinOperatorModule242 int64_t LastPartition() const { 243 PERFETTO_DCHECK(!IsEof()); 244 return IsMissingPartitionShadow() ? missing_partition_end_ - 1 245 : partition(); 246 } 247 248 // Returns the end timestamp of this slice adjusted to ensure that -1 249 // duration slices always returns ts. AdjustedTsEndSpanJoinOperatorModule250 int64_t AdjustedTsEnd() const { 251 PERFETTO_DCHECK(!IsEof()); 252 return ts_end_ - ts() == -1 ? ts() : ts_end_; 253 } 254 tsSpanJoinOperatorModule255 int64_t ts() const { 256 PERFETTO_DCHECK(!IsEof()); 257 return ts_; 258 } partitionSpanJoinOperatorModule259 int64_t partition() const { 260 PERFETTO_DCHECK(!IsEof() && defn_->IsPartitioned()); 261 return partition_; 262 } 263 raw_ts_endSpanJoinOperatorModule264 int64_t raw_ts_end() const { 265 PERFETTO_DCHECK(!IsEof()); 266 return ts_end_; 267 } 268 definitionSpanJoinOperatorModule269 const TableDefinition* definition() const { return defn_; } 270 271 private: 272 Query(Query&) = delete; 273 Query& operator=(const Query&) = delete; 274 275 // Returns whether the current slice pointed to is a valid slice. 276 bool IsValidSlice(); 277 278 // Forwards the query to the next valid slice. 279 base::Status FindNextValidSlice(); 280 281 // Advances the query state machine by one slice. 282 base::Status NextSliceState(); 283 284 // Forwards the cursor to point to the next real slice. 285 base::Status CursorNext(); 286 287 // Returns whether the current slice pointed to is a present partition 288 // shadow. IsPresentPartitionShadowSpanJoinOperatorModule289 bool IsPresentPartitionShadow() const { 290 return state_ == State::kPresentPartitionShadow; 291 } 292 293 // Returns whether the current slice pointed to is a missing partition 294 // shadow. IsMissingPartitionShadowSpanJoinOperatorModule295 bool IsMissingPartitionShadow() const { 296 return state_ == State::kMissingPartitionShadow; 297 } 298 299 // Returns whether the current slice pointed to is an empty shadow. IsEmptyShadowSpanJoinOperatorModule300 bool IsEmptyShadow() const { 301 PERFETTO_DCHECK(!IsEof()); 302 return (!IsReal() && ts_ == ts_end_) || 303 (IsMissingPartitionShadow() && 304 missing_partition_start_ == missing_partition_end_); 305 } 306 CursorTsSpanJoinOperatorModule307 int64_t CursorTs() const { 308 PERFETTO_DCHECK(!cursor_eof_); 309 auto ts_idx = static_cast<int>(defn_->ts_idx()); 310 return sqlite3_column_int64(stmt_->sqlite_stmt(), ts_idx); 311 } 312 CursorDurSpanJoinOperatorModule313 int64_t CursorDur() const { 314 PERFETTO_DCHECK(!cursor_eof_); 315 auto dur_idx = static_cast<int>(defn_->dur_idx()); 316 return sqlite3_column_int64(stmt_->sqlite_stmt(), dur_idx); 317 } 318 CursorPartitionSpanJoinOperatorModule319 int64_t CursorPartition() const { 320 PERFETTO_DCHECK(!cursor_eof_); 321 PERFETTO_DCHECK(defn_->IsPartitioned()); 322 auto partition_idx = static_cast<int>(defn_->partition_idx()); 323 return sqlite3_column_int64(stmt_->sqlite_stmt(), partition_idx); 324 } 325 326 State state_ = State::kMissingPartitionShadow; 327 bool cursor_eof_ = false; 328 329 // Only valid when |state_| != kEof. 330 int64_t ts_ = 0; 331 int64_t ts_end_ = std::numeric_limits<int64_t>::max(); 332 333 // Only valid when |state_| == kReal or |state_| == kPresentPartitionShadow. 334 int64_t partition_ = std::numeric_limits<int64_t>::min(); 335 336 // Only valid when |state_| == kMissingPartitionShadow. 337 int64_t missing_partition_start_ = 0; 338 int64_t missing_partition_end_ = 0; 339 340 std::string sql_query_; 341 std::optional<SqliteEngine::PreparedStatement> stmt_; 342 343 const TableDefinition* defn_ = nullptr; 344 SpanJoinOperatorModule::State* in_state_ = nullptr; 345 }; 346 347 // Columns of the span operator table. 348 enum Column { 349 kTimestamp = 0, 350 kDuration = 1, 351 kPartition = 2, 352 // All other columns are dynamic depending on the joined tables. 353 }; 354 355 // Enum indicating the possible partitionings of the two tables in span join. 356 enum class PartitioningType { 357 // Used when both tables don't have a partition specified. 358 kNoPartitioning = 0, 359 360 // Used when both tables have the same partition specified. 361 kSamePartitioning = 1, 362 363 // Used when one table has a partition and the other table doesn't. 364 kMixedPartitioning = 2 365 }; 366 367 // Identifier for a column by index in a given table. 368 struct ColumnLocator { 369 const TableDefinition* defn; 370 size_t col_index; 371 }; 372 373 struct Context { ContextSpanJoinOperatorModule::Context374 explicit Context(PerfettoSqlEngine* _engine) : engine(_engine) {} 375 376 PerfettoSqlEngine* engine; 377 sqlite::ModuleStateManager<SpanJoinOperatorModule> manager; 378 }; 379 struct State { IsLeftJoinSpanJoinOperatorModule::State380 bool IsLeftJoin() const { 381 return base::CaseInsensitiveEqual(module_name, "span_left_join"); 382 } IsOuterJoinSpanJoinOperatorModule::State383 bool IsOuterJoin() const { 384 return base::CaseInsensitiveEqual(module_name, "span_outer_join"); 385 } 386 partition_colSpanJoinOperatorModule::State387 const std::string& partition_col() const { 388 return t1_defn.IsPartitioned() ? t1_defn.partition_col() 389 : t2_defn.partition_col(); 390 } 391 392 std::string GetNameForGlobalColumnIndex(const TableDefinition& defn, 393 int global_column); 394 395 std::string BestIndexStrForDefinition(const sqlite3_index_info* info, 396 const TableDefinition& defn); 397 398 void PopulateColumnLocatorMap(uint32_t); 399 400 PerfettoSqlEngine* engine; 401 std::string module_name; 402 std::string create_table_stmt; 403 TableDefinition t1_defn; 404 TableDefinition t2_defn; 405 PartitioningType partitioning; 406 base::FlatHashMap<size_t, ColumnLocator> global_index_to_column_locator; 407 }; 408 struct Vtab : public sqlite3_vtab { 409 sqlite::ModuleStateManager<SpanJoinOperatorModule>::PerVtabState* state; 410 }; 411 412 // Base class for a cursor on the span table. 413 struct Cursor final : public sqlite3_vtab_cursor { CursorSpanJoinOperatorModule::final414 explicit Cursor(State* _state) 415 : t1(_state, &_state->t1_defn), 416 t2(_state, &_state->t2_defn), 417 state(_state) {} 418 419 bool IsOverlappingSpan() const; 420 base::Status FindOverlappingSpan(); 421 Query* FindEarliestFinishQuery(); 422 423 Query t1; 424 Query t2; 425 426 Query* next_query = nullptr; 427 428 // Only valid for kMixedPartition. 429 int64_t last_mixed_partition_ = std::numeric_limits<int64_t>::min(); 430 431 State* state; 432 }; 433 434 static constexpr bool kSupportsWrites = false; 435 436 static int Create(sqlite3*, 437 void*, 438 int, 439 const char* const*, 440 sqlite3_vtab**, 441 char**); 442 static int Destroy(sqlite3_vtab*); 443 444 static int Connect(sqlite3*, 445 void*, 446 int, 447 const char* const*, 448 sqlite3_vtab**, 449 char**); 450 static int Disconnect(sqlite3_vtab*); 451 452 static int BestIndex(sqlite3_vtab*, sqlite3_index_info*); 453 454 static int Open(sqlite3_vtab*, sqlite3_vtab_cursor**); 455 static int Close(sqlite3_vtab_cursor*); 456 457 static int Filter(sqlite3_vtab_cursor*, 458 int, 459 const char*, 460 int, 461 sqlite3_value**); 462 static int Next(sqlite3_vtab_cursor*); 463 static int Eof(sqlite3_vtab_cursor*); 464 static int Column(sqlite3_vtab_cursor*, sqlite3_context*, int); 465 static int Rowid(sqlite3_vtab_cursor*, sqlite_int64*); 466 467 static int FindFunction(sqlite3_vtab*, 468 int, 469 const char*, 470 FindFunctionFn**, 471 void**); 472 473 // This needs to happen at the end as it depends on the functions 474 // defined above. 475 static constexpr sqlite3_module kModule = CreateModule(); 476 }; 477 478 } // namespace perfetto::trace_processor 479 480 #endif // SRC_TRACE_PROCESSOR_PERFETTO_SQL_INTRINSICS_OPERATORS_SPAN_JOIN_OPERATOR_H_ 481