xref: /aosp_15_r20/external/perfetto/src/trace_processor/db/table.cc (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "src/trace_processor/db/table.h"
18 
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 #include "perfetto/base/status.h"
29 #include "perfetto/public/compiler.h"
30 #include "perfetto/trace_processor/basic_types.h"
31 #include "perfetto/trace_processor/ref_counted.h"
32 #include "src/trace_processor/containers/bit_vector.h"
33 #include "src/trace_processor/containers/row_map.h"
34 #include "src/trace_processor/containers/string_pool.h"
35 #include "src/trace_processor/db/column.h"
36 #include "src/trace_processor/db/column/arrangement_overlay.h"
37 #include "src/trace_processor/db/column/data_layer.h"
38 #include "src/trace_processor/db/column/overlay_layer.h"
39 #include "src/trace_processor/db/column/range_overlay.h"
40 #include "src/trace_processor/db/column/selector_overlay.h"
41 #include "src/trace_processor/db/column/storage_layer.h"
42 #include "src/trace_processor/db/column/types.h"
43 #include "src/trace_processor/db/column_storage_overlay.h"
44 #include "src/trace_processor/db/query_executor.h"
45 
46 namespace perfetto::trace_processor {
47 
48 namespace {
49 using Indices = column::DataLayerChain::Indices;
50 
51 constexpr uint32_t kIndexVectorThreshold = 1024;
52 
53 // Returns if |op| is an operation that can use the fact that the data is
54 // sorted.
IsSortingOp(FilterOp op)55 bool IsSortingOp(FilterOp op) {
56   switch (op) {
57     case FilterOp::kEq:
58     case FilterOp::kLe:
59     case FilterOp::kLt:
60     case FilterOp::kGe:
61     case FilterOp::kGt:
62     case FilterOp::kIsNotNull:
63     case FilterOp::kIsNull:
64       return true;
65     case FilterOp::kGlob:
66     case FilterOp::kRegex:
67     case FilterOp::kNe:
68       return false;
69   }
70   PERFETTO_FATAL("For GCC");
71 }
72 
ApplyMinMaxQuery(RowMap & rm,Order o,const column::DataLayerChain & chain)73 void ApplyMinMaxQuery(RowMap& rm,
74                       Order o,
75                       const column::DataLayerChain& chain) {
76   std::vector<uint32_t> table_indices = std::move(rm).TakeAsIndexVector();
77   auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
78   std::optional<Token> ret_tok =
79       o.desc ? chain.MaxElement(indices) : chain.MinElement(indices);
80   rm = ret_tok.has_value() ? RowMap(std::vector<uint32_t>{ret_tok->payload})
81                            : RowMap();
82 }
83 
ApplyLimitAndOffset(RowMap & rm,const Query & q)84 void ApplyLimitAndOffset(RowMap& rm, const Query& q) {
85   uint32_t end = rm.size();
86   uint32_t start = std::min(q.offset, end);
87   if (q.limit) {
88     end = std::min(end, *q.limit + q.offset);
89   }
90   rm = rm.SelectRows(RowMap(start, end));
91 }
92 
93 }  // namespace
94 
Table(StringPool * pool,uint32_t row_count,std::vector<ColumnLegacy> columns,std::vector<ColumnStorageOverlay> overlays)95 Table::Table(StringPool* pool,
96              uint32_t row_count,
97              std::vector<ColumnLegacy> columns,
98              std::vector<ColumnStorageOverlay> overlays)
99     : string_pool_(pool),
100       row_count_(row_count),
101       overlays_(std::move(overlays)),
102       columns_(std::move(columns)) {
103   PERFETTO_DCHECK(string_pool_);
104 }
105 
106 Table::~Table() = default;
107 
operator =(Table && other)108 Table& Table::operator=(Table&& other) noexcept {
109   row_count_ = other.row_count_;
110   string_pool_ = other.string_pool_;
111 
112   overlays_ = std::move(other.overlays_);
113   columns_ = std::move(other.columns_);
114   indexes_ = std::move(other.indexes_);
115 
116   storage_layers_ = std::move(other.storage_layers_);
117   null_layers_ = std::move(other.null_layers_);
118   overlay_layers_ = std::move(other.overlay_layers_);
119   chains_ = std::move(other.chains_);
120 
121   for (ColumnLegacy& col : columns_) {
122     col.table_ = this;
123   }
124   return *this;
125 }
126 
Copy() const127 Table Table::Copy() const {
128   Table table = CopyExceptOverlays();
129   for (const ColumnStorageOverlay& overlay : overlays_) {
130     table.overlays_.emplace_back(overlay.Copy());
131   }
132   table.OnConstructionCompleted(storage_layers_, null_layers_, overlay_layers_);
133   return table;
134 }
135 
CopyExceptOverlays() const136 Table Table::CopyExceptOverlays() const {
137   std::vector<ColumnLegacy> cols;
138   cols.reserve(columns_.size());
139   for (const ColumnLegacy& col : columns_) {
140     cols.emplace_back(col, col.index_in_table(), col.overlay_index());
141   }
142   return {string_pool_, row_count_, std::move(cols), {}};
143 }
144 
QueryToRowMap(const Query & q) const145 RowMap Table::QueryToRowMap(const Query& q) const {
146   // We need to delay creation of the chains to this point because of Chrome
147   // does not want the binary size overhead of including the chain
148   // implementations. As they also don't query tables (instead just iterating)
149   // over them), using a combination of dead code elimination and linker
150   // stripping all chain related code be removed.
151   //
152   // From rough benchmarking, this has a negligible impact on peformance as this
153   // branch is almost never taken.
154   if (PERFETTO_UNLIKELY(chains_.size() != columns_.size())) {
155     CreateChains();
156   }
157 
158   // Fast path for joining on id.
159   const auto& cs = q.constraints;
160   RowMap rm;
161   uint32_t cs_offset = 0;
162   if (!cs.empty() && cs.front().op == FilterOp::kEq &&
163       cs.front().value.type == SqlValue::kLong &&
164       columns_[cs.front().col_idx].IsId() &&
165       !HasNullOrOverlayLayer(cs.front().col_idx)) {
166     rm = ApplyIdJoinConstraints(cs, cs_offset);
167   } else {
168     rm = TryApplyIndex(cs, cs_offset);
169   }
170 
171   // Filter on constraints that are not using index.
172   for (; cs_offset < cs.size(); cs_offset++) {
173     const Constraint& c = cs[cs_offset];
174     QueryExecutor::ApplyConstraint(c, ChainForColumn(c.col_idx), &rm);
175   }
176 
177   if (q.order_type != Query::OrderType::kSort) {
178     ApplyDistinct(q, &rm);
179   }
180 
181   // Fastpath for one sort, no distinct and limit 1. This type of query means we
182   // need to run Max/Min on orderby column and there is no need for sorting.
183   if (q.IsMinMaxQuery()) {
184     ApplyMinMaxQuery(rm, q.orders.front(),
185                      ChainForColumn(q.orders.front().col_idx));
186     return rm;
187   }
188 
189   if (q.RequireSort()) {
190     ApplySort(q, &rm);
191   }
192 
193   if (q.limit.has_value() || q.offset != 0) {
194     ApplyLimitAndOffset(rm, q);
195   }
196 
197   return rm;
198 }
199 
Sort(const std::vector<Order> & ob) const200 Table Table::Sort(const std::vector<Order>& ob) const {
201   if (ob.empty()) {
202     return Copy();
203   }
204 
205   // Return a copy of this table with the RowMaps using the computed ordered
206   // RowMap.
207   Table table = CopyExceptOverlays();
208   Query q;
209   q.orders = ob;
210   RowMap rm = QueryToRowMap(q);
211   for (const ColumnStorageOverlay& overlay : overlays_) {
212     table.overlays_.emplace_back(overlay.SelectRows(rm));
213     PERFETTO_DCHECK(table.overlays_.back().size() == table.row_count());
214   }
215 
216   // Remove the sorted and row set flags from all the columns.
217   for (auto& col : table.columns_) {
218     col.flags_ &= ~ColumnLegacy::Flag::kSorted;
219     col.flags_ &= ~ColumnLegacy::Flag::kSetId;
220   }
221 
222   // For the first order by, make the column flag itself as sorted but
223   // only if the sort was in ascending order.
224   if (!ob.front().desc) {
225     table.columns_[ob.front().col_idx].flags_ |= ColumnLegacy::Flag::kSorted;
226   }
227 
228   std::vector<RefPtr<column::OverlayLayer>> overlay_layers(
229       table.overlays_.size());
230   for (uint32_t i = 0; i < table.overlays_.size(); ++i) {
231     if (table.overlays_[i].row_map().IsIndexVector()) {
232       overlay_layers[i].reset(new column::ArrangementOverlay(
233           table.overlays_[i].row_map().GetIfIndexVector(),
234           column::DataLayerChain::Indices::State::kNonmonotonic));
235     } else if (table.overlays_[i].row_map().IsBitVector()) {
236       overlay_layers[i].reset(new column::SelectorOverlay(
237           table.overlays_[i].row_map().GetIfBitVector()));
238     } else if (table.overlays_[i].row_map().IsRange()) {
239       overlay_layers[i].reset(
240           new column::RangeOverlay(table.overlays_[i].row_map().GetIfIRange()));
241     }
242   }
243   table.OnConstructionCompleted(storage_layers_, null_layers_,
244                                 std::move(overlay_layers));
245   return table;
246 }
247 
OnConstructionCompleted(std::vector<RefPtr<column::StorageLayer>> storage_layers,std::vector<RefPtr<column::OverlayLayer>> null_layers,std::vector<RefPtr<column::OverlayLayer>> overlay_layers)248 void Table::OnConstructionCompleted(
249     std::vector<RefPtr<column::StorageLayer>> storage_layers,
250     std::vector<RefPtr<column::OverlayLayer>> null_layers,
251     std::vector<RefPtr<column::OverlayLayer>> overlay_layers) {
252   for (ColumnLegacy& col : columns_) {
253     col.BindToTable(this, string_pool_);
254   }
255   PERFETTO_CHECK(storage_layers.size() == columns_.size());
256   PERFETTO_CHECK(null_layers.size() == columns_.size());
257   PERFETTO_CHECK(overlay_layers.size() == overlays_.size());
258   storage_layers_ = std::move(storage_layers);
259   null_layers_ = std::move(null_layers);
260   overlay_layers_ = std::move(overlay_layers);
261 }
262 
HasNullOrOverlayLayer(uint32_t col_idx) const263 bool Table::HasNullOrOverlayLayer(uint32_t col_idx) const {
264   if (null_layers_[col_idx].get()) {
265     return true;
266   }
267   const auto& oly_idx = columns_[col_idx].overlay_index();
268   const auto& overlay = overlay_layers_[oly_idx];
269   return overlay.get() != nullptr;
270 }
271 
CreateChains() const272 void Table::CreateChains() const {
273   chains_.resize(columns_.size());
274   for (uint32_t i = 0; i < columns_.size(); ++i) {
275     chains_[i] = storage_layers_[i]->MakeChain();
276     if (const auto& null_overlay = null_layers_[i]; null_overlay.get()) {
277       chains_[i] = null_overlay->MakeChain(std::move(chains_[i]));
278     }
279     const auto& oly_idx = columns_[i].overlay_index();
280     if (const auto& overlay = overlay_layers_[oly_idx]; overlay.get()) {
281       chains_[i] = overlay->MakeChain(
282           std::move(chains_[i]),
283           column::DataLayer::ChainCreationArgs{columns_[i].IsSorted()});
284     }
285   }
286 }
287 
CreateIndex(const std::string & name,std::vector<uint32_t> col_idxs,bool replace)288 base::Status Table::CreateIndex(const std::string& name,
289                                 std::vector<uint32_t> col_idxs,
290                                 bool replace) {
291   Query q;
292   for (const auto& c : col_idxs) {
293     q.orders.push_back({c});
294   }
295   std::vector<uint32_t> index = QueryToRowMap(q).TakeAsIndexVector();
296 
297   auto it = std::find_if(
298       indexes_.begin(), indexes_.end(),
299       [&name](const ColumnIndex& idx) { return idx.name == name; });
300   if (it == indexes_.end()) {
301     indexes_.push_back({name, std::move(col_idxs), std::move(index)});
302     return base::OkStatus();
303   }
304   if (replace) {
305     it->columns = std::move(col_idxs);
306     it->index = std::move(index);
307     return base::OkStatus();
308   }
309   return base::ErrStatus("Index of this name already exists on this table.");
310 }
311 
DropIndex(const std::string & name)312 base::Status Table::DropIndex(const std::string& name) {
313   auto it = std::find_if(
314       indexes_.begin(), indexes_.end(),
315       [&name](const ColumnIndex& idx) { return idx.name == name; });
316   if (it == indexes_.end()) {
317     return base::ErrStatus("Index '%s' not found.", name.c_str());
318   }
319   indexes_.erase(it);
320   return base::OkStatus();
321 }
322 
ApplyDistinct(const Query & q,RowMap * rm) const323 void Table::ApplyDistinct(const Query& q, RowMap* rm) const {
324   const auto& ob = q.orders;
325   PERFETTO_DCHECK(!ob.empty());
326 
327   // `q.orders` should be treated here only as information on what should we
328   // run distinct on, they should not be used for subsequent sorting.
329   // TODO(mayzner): Remove the check after we implement the multi column
330   // distinct.
331   PERFETTO_DCHECK(ob.size() == 1);
332 
333   std::vector<uint32_t> table_indices = std::move(*rm).TakeAsIndexVector();
334   auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
335   ChainForColumn(ob.front().col_idx).Distinct(indices);
336   PERFETTO_DCHECK(indices.tokens.size() <= table_indices.size());
337 
338   for (uint32_t i = 0; i < indices.tokens.size(); ++i) {
339     table_indices[i] = indices.tokens[i].payload;
340   }
341   table_indices.resize(indices.tokens.size());
342 
343   // Sorting that happens later might require indices to preserve ordering.
344   // TODO(mayzner): Needs to be changed after implementing multi column
345   // distinct.
346   if (q.order_type == Query::OrderType::kDistinctAndSort) {
347     std::sort(table_indices.begin(), table_indices.end());
348   }
349 
350   *rm = RowMap(std::move(table_indices));
351 }
352 
ApplySort(const Query & q,RowMap * rm) const353 void Table::ApplySort(const Query& q, RowMap* rm) const {
354   const auto& ob = q.orders;
355   // Return the RowMap directly if there is a single constraint to sort the
356   // table by a column which is already sorted.
357   const auto& first_col = columns_[ob.front().col_idx];
358   if (ob.size() == 1 && first_col.IsSorted() && !ob.front().desc)
359     return;
360 
361   // Build an index vector with all the indices for the first |size_| rows.
362   std::vector<uint32_t> idx = std::move(*rm).TakeAsIndexVector();
363   if (ob.size() == 1 && first_col.IsSorted()) {
364     // We special case a single constraint in descending order as this
365     // happens any time the |max| function is used in SQLite. We can be
366     // more efficient as this column is already sorted so we simply need
367     // to reverse the order of this column.
368     PERFETTO_DCHECK(ob.front().desc);
369     std::reverse(idx.begin(), idx.end());
370   } else {
371     QueryExecutor::SortLegacy(this, ob, idx);
372   }
373 
374   *rm = RowMap(std::move(idx));
375 }
376 
TryApplyIndex(const std::vector<Constraint> & c_vec,uint32_t & cs_offset) const377 RowMap Table::TryApplyIndex(const std::vector<Constraint>& c_vec,
378                             uint32_t& cs_offset) const {
379   RowMap rm(0, row_count());
380 
381   // Prework - use indexes if possible and decide which one.
382   std::vector<uint32_t> maybe_idx_cols;
383   for (const auto& c : c_vec) {
384     // Id columns shouldn't use index.
385     if (columns()[c.col_idx].IsId()) {
386       break;
387     }
388     // The operation has to support sorting.
389     if (!IsSortingOp(c.op)) {
390       break;
391     }
392     maybe_idx_cols.push_back(c.col_idx);
393 
394     // For the next col to be able to use index, all previous constraints have
395     // to be equality.
396     if (c.op != FilterOp::kEq) {
397       break;
398     }
399   }
400 
401   OrderedIndices o_idxs;
402   while (!maybe_idx_cols.empty()) {
403     if (auto maybe_idx = GetIndex(maybe_idx_cols)) {
404       o_idxs = *maybe_idx;
405       break;
406     }
407     maybe_idx_cols.pop_back();
408   }
409 
410   // If we can't use the index just apply constraints in a standard way.
411   if (maybe_idx_cols.empty()) {
412     return rm;
413   }
414 
415   for (uint32_t i = 0; i < maybe_idx_cols.size(); i++) {
416     const Constraint& c = c_vec[i];
417     Range r =
418         ChainForColumn(c.col_idx).OrderedIndexSearch(c.op, c.value, o_idxs);
419     o_idxs.data += r.start;
420     o_idxs.size = r.size();
421   }
422   cs_offset = static_cast<uint32_t>(maybe_idx_cols.size());
423 
424   std::vector<uint32_t> res_vec(o_idxs.data, o_idxs.data + o_idxs.size);
425   if (res_vec.size() < kIndexVectorThreshold) {
426     std::sort(res_vec.begin(), res_vec.end());
427     return RowMap(std::move(res_vec));
428   }
429   return RowMap(BitVector::FromUnsortedIndexVector(res_vec));
430 }
431 
ApplyIdJoinConstraints(const std::vector<Constraint> & cs,uint32_t & cs_offset) const432 RowMap Table::ApplyIdJoinConstraints(const std::vector<Constraint>& cs,
433                                      uint32_t& cs_offset) const {
434   uint32_t i = 1;
435   uint32_t row = static_cast<uint32_t>(cs.front().value.AsLong());
436   if (row >= row_count()) {
437     return RowMap();
438   }
439   for (; i < cs.size(); i++) {
440     const Constraint& c = cs[i];
441     switch (ChainForColumn(c.col_idx).SingleSearch(c.op, c.value, row)) {
442       case SingleSearchResult::kNoMatch:
443         return RowMap();
444       case SingleSearchResult::kMatch:
445         continue;
446       case SingleSearchResult::kNeedsFullSearch:
447         cs_offset = i;
448         return RowMap(row, row + 1);
449     }
450   }
451   cs_offset = static_cast<uint32_t>(cs.size());
452   return RowMap(row, row + 1);
453 }
454 
455 }  // namespace perfetto::trace_processor
456