1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "src/trace_processor/db/table.h"
18
19 #include <algorithm>
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26
27 #include "perfetto/base/logging.h"
28 #include "perfetto/base/status.h"
29 #include "perfetto/public/compiler.h"
30 #include "perfetto/trace_processor/basic_types.h"
31 #include "perfetto/trace_processor/ref_counted.h"
32 #include "src/trace_processor/containers/bit_vector.h"
33 #include "src/trace_processor/containers/row_map.h"
34 #include "src/trace_processor/containers/string_pool.h"
35 #include "src/trace_processor/db/column.h"
36 #include "src/trace_processor/db/column/arrangement_overlay.h"
37 #include "src/trace_processor/db/column/data_layer.h"
38 #include "src/trace_processor/db/column/overlay_layer.h"
39 #include "src/trace_processor/db/column/range_overlay.h"
40 #include "src/trace_processor/db/column/selector_overlay.h"
41 #include "src/trace_processor/db/column/storage_layer.h"
42 #include "src/trace_processor/db/column/types.h"
43 #include "src/trace_processor/db/column_storage_overlay.h"
44 #include "src/trace_processor/db/query_executor.h"
45
46 namespace perfetto::trace_processor {
47
48 namespace {
49 using Indices = column::DataLayerChain::Indices;
50
51 constexpr uint32_t kIndexVectorThreshold = 1024;
52
53 // Returns if |op| is an operation that can use the fact that the data is
54 // sorted.
IsSortingOp(FilterOp op)55 bool IsSortingOp(FilterOp op) {
56 switch (op) {
57 case FilterOp::kEq:
58 case FilterOp::kLe:
59 case FilterOp::kLt:
60 case FilterOp::kGe:
61 case FilterOp::kGt:
62 case FilterOp::kIsNotNull:
63 case FilterOp::kIsNull:
64 return true;
65 case FilterOp::kGlob:
66 case FilterOp::kRegex:
67 case FilterOp::kNe:
68 return false;
69 }
70 PERFETTO_FATAL("For GCC");
71 }
72
ApplyMinMaxQuery(RowMap & rm,Order o,const column::DataLayerChain & chain)73 void ApplyMinMaxQuery(RowMap& rm,
74 Order o,
75 const column::DataLayerChain& chain) {
76 std::vector<uint32_t> table_indices = std::move(rm).TakeAsIndexVector();
77 auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
78 std::optional<Token> ret_tok =
79 o.desc ? chain.MaxElement(indices) : chain.MinElement(indices);
80 rm = ret_tok.has_value() ? RowMap(std::vector<uint32_t>{ret_tok->payload})
81 : RowMap();
82 }
83
ApplyLimitAndOffset(RowMap & rm,const Query & q)84 void ApplyLimitAndOffset(RowMap& rm, const Query& q) {
85 uint32_t end = rm.size();
86 uint32_t start = std::min(q.offset, end);
87 if (q.limit) {
88 end = std::min(end, *q.limit + q.offset);
89 }
90 rm = rm.SelectRows(RowMap(start, end));
91 }
92
93 } // namespace
94
Table(StringPool * pool,uint32_t row_count,std::vector<ColumnLegacy> columns,std::vector<ColumnStorageOverlay> overlays)95 Table::Table(StringPool* pool,
96 uint32_t row_count,
97 std::vector<ColumnLegacy> columns,
98 std::vector<ColumnStorageOverlay> overlays)
99 : string_pool_(pool),
100 row_count_(row_count),
101 overlays_(std::move(overlays)),
102 columns_(std::move(columns)) {
103 PERFETTO_DCHECK(string_pool_);
104 }
105
106 Table::~Table() = default;
107
operator =(Table && other)108 Table& Table::operator=(Table&& other) noexcept {
109 row_count_ = other.row_count_;
110 string_pool_ = other.string_pool_;
111
112 overlays_ = std::move(other.overlays_);
113 columns_ = std::move(other.columns_);
114 indexes_ = std::move(other.indexes_);
115
116 storage_layers_ = std::move(other.storage_layers_);
117 null_layers_ = std::move(other.null_layers_);
118 overlay_layers_ = std::move(other.overlay_layers_);
119 chains_ = std::move(other.chains_);
120
121 for (ColumnLegacy& col : columns_) {
122 col.table_ = this;
123 }
124 return *this;
125 }
126
Copy() const127 Table Table::Copy() const {
128 Table table = CopyExceptOverlays();
129 for (const ColumnStorageOverlay& overlay : overlays_) {
130 table.overlays_.emplace_back(overlay.Copy());
131 }
132 table.OnConstructionCompleted(storage_layers_, null_layers_, overlay_layers_);
133 return table;
134 }
135
CopyExceptOverlays() const136 Table Table::CopyExceptOverlays() const {
137 std::vector<ColumnLegacy> cols;
138 cols.reserve(columns_.size());
139 for (const ColumnLegacy& col : columns_) {
140 cols.emplace_back(col, col.index_in_table(), col.overlay_index());
141 }
142 return {string_pool_, row_count_, std::move(cols), {}};
143 }
144
QueryToRowMap(const Query & q) const145 RowMap Table::QueryToRowMap(const Query& q) const {
146 // We need to delay creation of the chains to this point because of Chrome
147 // does not want the binary size overhead of including the chain
148 // implementations. As they also don't query tables (instead just iterating)
149 // over them), using a combination of dead code elimination and linker
150 // stripping all chain related code be removed.
151 //
152 // From rough benchmarking, this has a negligible impact on peformance as this
153 // branch is almost never taken.
154 if (PERFETTO_UNLIKELY(chains_.size() != columns_.size())) {
155 CreateChains();
156 }
157
158 // Fast path for joining on id.
159 const auto& cs = q.constraints;
160 RowMap rm;
161 uint32_t cs_offset = 0;
162 if (!cs.empty() && cs.front().op == FilterOp::kEq &&
163 cs.front().value.type == SqlValue::kLong &&
164 columns_[cs.front().col_idx].IsId() &&
165 !HasNullOrOverlayLayer(cs.front().col_idx)) {
166 rm = ApplyIdJoinConstraints(cs, cs_offset);
167 } else {
168 rm = TryApplyIndex(cs, cs_offset);
169 }
170
171 // Filter on constraints that are not using index.
172 for (; cs_offset < cs.size(); cs_offset++) {
173 const Constraint& c = cs[cs_offset];
174 QueryExecutor::ApplyConstraint(c, ChainForColumn(c.col_idx), &rm);
175 }
176
177 if (q.order_type != Query::OrderType::kSort) {
178 ApplyDistinct(q, &rm);
179 }
180
181 // Fastpath for one sort, no distinct and limit 1. This type of query means we
182 // need to run Max/Min on orderby column and there is no need for sorting.
183 if (q.IsMinMaxQuery()) {
184 ApplyMinMaxQuery(rm, q.orders.front(),
185 ChainForColumn(q.orders.front().col_idx));
186 return rm;
187 }
188
189 if (q.RequireSort()) {
190 ApplySort(q, &rm);
191 }
192
193 if (q.limit.has_value() || q.offset != 0) {
194 ApplyLimitAndOffset(rm, q);
195 }
196
197 return rm;
198 }
199
Sort(const std::vector<Order> & ob) const200 Table Table::Sort(const std::vector<Order>& ob) const {
201 if (ob.empty()) {
202 return Copy();
203 }
204
205 // Return a copy of this table with the RowMaps using the computed ordered
206 // RowMap.
207 Table table = CopyExceptOverlays();
208 Query q;
209 q.orders = ob;
210 RowMap rm = QueryToRowMap(q);
211 for (const ColumnStorageOverlay& overlay : overlays_) {
212 table.overlays_.emplace_back(overlay.SelectRows(rm));
213 PERFETTO_DCHECK(table.overlays_.back().size() == table.row_count());
214 }
215
216 // Remove the sorted and row set flags from all the columns.
217 for (auto& col : table.columns_) {
218 col.flags_ &= ~ColumnLegacy::Flag::kSorted;
219 col.flags_ &= ~ColumnLegacy::Flag::kSetId;
220 }
221
222 // For the first order by, make the column flag itself as sorted but
223 // only if the sort was in ascending order.
224 if (!ob.front().desc) {
225 table.columns_[ob.front().col_idx].flags_ |= ColumnLegacy::Flag::kSorted;
226 }
227
228 std::vector<RefPtr<column::OverlayLayer>> overlay_layers(
229 table.overlays_.size());
230 for (uint32_t i = 0; i < table.overlays_.size(); ++i) {
231 if (table.overlays_[i].row_map().IsIndexVector()) {
232 overlay_layers[i].reset(new column::ArrangementOverlay(
233 table.overlays_[i].row_map().GetIfIndexVector(),
234 column::DataLayerChain::Indices::State::kNonmonotonic));
235 } else if (table.overlays_[i].row_map().IsBitVector()) {
236 overlay_layers[i].reset(new column::SelectorOverlay(
237 table.overlays_[i].row_map().GetIfBitVector()));
238 } else if (table.overlays_[i].row_map().IsRange()) {
239 overlay_layers[i].reset(
240 new column::RangeOverlay(table.overlays_[i].row_map().GetIfIRange()));
241 }
242 }
243 table.OnConstructionCompleted(storage_layers_, null_layers_,
244 std::move(overlay_layers));
245 return table;
246 }
247
OnConstructionCompleted(std::vector<RefPtr<column::StorageLayer>> storage_layers,std::vector<RefPtr<column::OverlayLayer>> null_layers,std::vector<RefPtr<column::OverlayLayer>> overlay_layers)248 void Table::OnConstructionCompleted(
249 std::vector<RefPtr<column::StorageLayer>> storage_layers,
250 std::vector<RefPtr<column::OverlayLayer>> null_layers,
251 std::vector<RefPtr<column::OverlayLayer>> overlay_layers) {
252 for (ColumnLegacy& col : columns_) {
253 col.BindToTable(this, string_pool_);
254 }
255 PERFETTO_CHECK(storage_layers.size() == columns_.size());
256 PERFETTO_CHECK(null_layers.size() == columns_.size());
257 PERFETTO_CHECK(overlay_layers.size() == overlays_.size());
258 storage_layers_ = std::move(storage_layers);
259 null_layers_ = std::move(null_layers);
260 overlay_layers_ = std::move(overlay_layers);
261 }
262
HasNullOrOverlayLayer(uint32_t col_idx) const263 bool Table::HasNullOrOverlayLayer(uint32_t col_idx) const {
264 if (null_layers_[col_idx].get()) {
265 return true;
266 }
267 const auto& oly_idx = columns_[col_idx].overlay_index();
268 const auto& overlay = overlay_layers_[oly_idx];
269 return overlay.get() != nullptr;
270 }
271
CreateChains() const272 void Table::CreateChains() const {
273 chains_.resize(columns_.size());
274 for (uint32_t i = 0; i < columns_.size(); ++i) {
275 chains_[i] = storage_layers_[i]->MakeChain();
276 if (const auto& null_overlay = null_layers_[i]; null_overlay.get()) {
277 chains_[i] = null_overlay->MakeChain(std::move(chains_[i]));
278 }
279 const auto& oly_idx = columns_[i].overlay_index();
280 if (const auto& overlay = overlay_layers_[oly_idx]; overlay.get()) {
281 chains_[i] = overlay->MakeChain(
282 std::move(chains_[i]),
283 column::DataLayer::ChainCreationArgs{columns_[i].IsSorted()});
284 }
285 }
286 }
287
CreateIndex(const std::string & name,std::vector<uint32_t> col_idxs,bool replace)288 base::Status Table::CreateIndex(const std::string& name,
289 std::vector<uint32_t> col_idxs,
290 bool replace) {
291 Query q;
292 for (const auto& c : col_idxs) {
293 q.orders.push_back({c});
294 }
295 std::vector<uint32_t> index = QueryToRowMap(q).TakeAsIndexVector();
296
297 auto it = std::find_if(
298 indexes_.begin(), indexes_.end(),
299 [&name](const ColumnIndex& idx) { return idx.name == name; });
300 if (it == indexes_.end()) {
301 indexes_.push_back({name, std::move(col_idxs), std::move(index)});
302 return base::OkStatus();
303 }
304 if (replace) {
305 it->columns = std::move(col_idxs);
306 it->index = std::move(index);
307 return base::OkStatus();
308 }
309 return base::ErrStatus("Index of this name already exists on this table.");
310 }
311
DropIndex(const std::string & name)312 base::Status Table::DropIndex(const std::string& name) {
313 auto it = std::find_if(
314 indexes_.begin(), indexes_.end(),
315 [&name](const ColumnIndex& idx) { return idx.name == name; });
316 if (it == indexes_.end()) {
317 return base::ErrStatus("Index '%s' not found.", name.c_str());
318 }
319 indexes_.erase(it);
320 return base::OkStatus();
321 }
322
ApplyDistinct(const Query & q,RowMap * rm) const323 void Table::ApplyDistinct(const Query& q, RowMap* rm) const {
324 const auto& ob = q.orders;
325 PERFETTO_DCHECK(!ob.empty());
326
327 // `q.orders` should be treated here only as information on what should we
328 // run distinct on, they should not be used for subsequent sorting.
329 // TODO(mayzner): Remove the check after we implement the multi column
330 // distinct.
331 PERFETTO_DCHECK(ob.size() == 1);
332
333 std::vector<uint32_t> table_indices = std::move(*rm).TakeAsIndexVector();
334 auto indices = Indices::Create(table_indices, Indices::State::kMonotonic);
335 ChainForColumn(ob.front().col_idx).Distinct(indices);
336 PERFETTO_DCHECK(indices.tokens.size() <= table_indices.size());
337
338 for (uint32_t i = 0; i < indices.tokens.size(); ++i) {
339 table_indices[i] = indices.tokens[i].payload;
340 }
341 table_indices.resize(indices.tokens.size());
342
343 // Sorting that happens later might require indices to preserve ordering.
344 // TODO(mayzner): Needs to be changed after implementing multi column
345 // distinct.
346 if (q.order_type == Query::OrderType::kDistinctAndSort) {
347 std::sort(table_indices.begin(), table_indices.end());
348 }
349
350 *rm = RowMap(std::move(table_indices));
351 }
352
ApplySort(const Query & q,RowMap * rm) const353 void Table::ApplySort(const Query& q, RowMap* rm) const {
354 const auto& ob = q.orders;
355 // Return the RowMap directly if there is a single constraint to sort the
356 // table by a column which is already sorted.
357 const auto& first_col = columns_[ob.front().col_idx];
358 if (ob.size() == 1 && first_col.IsSorted() && !ob.front().desc)
359 return;
360
361 // Build an index vector with all the indices for the first |size_| rows.
362 std::vector<uint32_t> idx = std::move(*rm).TakeAsIndexVector();
363 if (ob.size() == 1 && first_col.IsSorted()) {
364 // We special case a single constraint in descending order as this
365 // happens any time the |max| function is used in SQLite. We can be
366 // more efficient as this column is already sorted so we simply need
367 // to reverse the order of this column.
368 PERFETTO_DCHECK(ob.front().desc);
369 std::reverse(idx.begin(), idx.end());
370 } else {
371 QueryExecutor::SortLegacy(this, ob, idx);
372 }
373
374 *rm = RowMap(std::move(idx));
375 }
376
TryApplyIndex(const std::vector<Constraint> & c_vec,uint32_t & cs_offset) const377 RowMap Table::TryApplyIndex(const std::vector<Constraint>& c_vec,
378 uint32_t& cs_offset) const {
379 RowMap rm(0, row_count());
380
381 // Prework - use indexes if possible and decide which one.
382 std::vector<uint32_t> maybe_idx_cols;
383 for (const auto& c : c_vec) {
384 // Id columns shouldn't use index.
385 if (columns()[c.col_idx].IsId()) {
386 break;
387 }
388 // The operation has to support sorting.
389 if (!IsSortingOp(c.op)) {
390 break;
391 }
392 maybe_idx_cols.push_back(c.col_idx);
393
394 // For the next col to be able to use index, all previous constraints have
395 // to be equality.
396 if (c.op != FilterOp::kEq) {
397 break;
398 }
399 }
400
401 OrderedIndices o_idxs;
402 while (!maybe_idx_cols.empty()) {
403 if (auto maybe_idx = GetIndex(maybe_idx_cols)) {
404 o_idxs = *maybe_idx;
405 break;
406 }
407 maybe_idx_cols.pop_back();
408 }
409
410 // If we can't use the index just apply constraints in a standard way.
411 if (maybe_idx_cols.empty()) {
412 return rm;
413 }
414
415 for (uint32_t i = 0; i < maybe_idx_cols.size(); i++) {
416 const Constraint& c = c_vec[i];
417 Range r =
418 ChainForColumn(c.col_idx).OrderedIndexSearch(c.op, c.value, o_idxs);
419 o_idxs.data += r.start;
420 o_idxs.size = r.size();
421 }
422 cs_offset = static_cast<uint32_t>(maybe_idx_cols.size());
423
424 std::vector<uint32_t> res_vec(o_idxs.data, o_idxs.data + o_idxs.size);
425 if (res_vec.size() < kIndexVectorThreshold) {
426 std::sort(res_vec.begin(), res_vec.end());
427 return RowMap(std::move(res_vec));
428 }
429 return RowMap(BitVector::FromUnsortedIndexVector(res_vec));
430 }
431
ApplyIdJoinConstraints(const std::vector<Constraint> & cs,uint32_t & cs_offset) const432 RowMap Table::ApplyIdJoinConstraints(const std::vector<Constraint>& cs,
433 uint32_t& cs_offset) const {
434 uint32_t i = 1;
435 uint32_t row = static_cast<uint32_t>(cs.front().value.AsLong());
436 if (row >= row_count()) {
437 return RowMap();
438 }
439 for (; i < cs.size(); i++) {
440 const Constraint& c = cs[i];
441 switch (ChainForColumn(c.col_idx).SingleSearch(c.op, c.value, row)) {
442 case SingleSearchResult::kNoMatch:
443 return RowMap();
444 case SingleSearchResult::kMatch:
445 continue;
446 case SingleSearchResult::kNeedsFullSearch:
447 cs_offset = i;
448 return RowMap(row, row + 1);
449 }
450 }
451 cs_offset = static_cast<uint32_t>(cs.size());
452 return RowMap(row, row + 1);
453 }
454
455 } // namespace perfetto::trace_processor
456