xref: /aosp_15_r20/external/perfetto/src/trace_processor/db/column/data_layer.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2023 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
18 #define SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
19 
20 #include <cstdint>
21 #include <memory>
22 #include <optional>
23 #include <string>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/logging.h"
28 #include "perfetto/public/compiler.h"
29 #include "perfetto/trace_processor/basic_types.h"
30 #include "perfetto/trace_processor/ref_counted.h"
31 #include "src/trace_processor/db/column/types.h"
32 
33 namespace perfetto::trace_processor::column {
34 class DataLayerChain;
35 
36 // Data structure which either directly or indirectly (i.e. by transforming
37 // the contents of another DataLayer) provides the data of a column of a
38 // table.
39 class DataLayer : public RefCounted {
40  public:
41   // Arguments for MakeChain on how the inner chain should be interpreted.
42   struct ChainCreationArgs {
43     constexpr explicit ChainCreationArgs(
44         bool _does_layer_order_chain_contents = false)
does_layer_order_chain_contentsChainCreationArgs45         : does_layer_order_chain_contents(_does_layer_order_chain_contents) {}
46 
47     // Indicates whether the current data layer orders the inner chain.
48     // Currently used by ArrangementOverlay to decide whether the arrangement
49     // orders a given chain.
50     bool does_layer_order_chain_contents;
51   };
52   virtual ~DataLayer();
53 
54   // Creates a DataLayerChain for a terminal DataLayer. This means the
55   // DataLayer directly should return the data it contains inside.
56   std::unique_ptr<DataLayerChain> MakeChain();
57 
58   // Creates a DataLayerChain for a non-terminal DataLayer. This means
59   // the DataLayer should transform the contents of the inner chain.
60   std::unique_ptr<DataLayerChain> MakeChain(
61       std::unique_ptr<DataLayerChain>,
62       ChainCreationArgs = ChainCreationArgs());
63 
64  protected:
65   // TODO(b/325583551): remove this when possible.
66   enum class Impl {
67     kArrangement,
68     kDenseNull,
69     kDummy,
70     kId,
71     kNull,
72     kNumericDouble,
73     kNumericUint32,
74     kNumericInt32,
75     kNumericInt64,
76     kRange,
77     kSelector,
78     kSetId,
79     kString,
80   };
DataLayer(Impl impl)81   explicit DataLayer(Impl impl) : impl_(impl) {}
82 
83  private:
84   Impl impl_;
85 };
86 
87 // Corresponds to a series of DataLayer chained together. Provides
88 // functionality for querying the transformed data of the entire chain.
89 class DataLayerChain {
90  public:
91   // Index vector related data required to Filter using IndexSearch.
92   struct Indices {
93     enum class State {
94       // We can't guarantee that data is in monotonic order.
95       kNonmonotonic,
96       // Data is in monotonic order.
97       kMonotonic,
98     };
CreateIndices99     static Indices Create(const std::vector<uint32_t>& raw, State state) {
100       std::vector<Token> tokens;
101       tokens.reserve(raw.size());
102       for (auto r : raw) {
103         tokens.push_back({r, r});
104       }
105       return Indices{std::move(tokens), state};
106     }
CreateWithIndexPayloadForTestingIndices107     static Indices CreateWithIndexPayloadForTesting(
108         const std::vector<uint32_t>& raw,
109         State state) {
110       std::vector<Token> tokens;
111       tokens.reserve(raw.size());
112       for (uint32_t i = 0; i < raw.size(); ++i) {
113         tokens.push_back(Token{raw[i], i});
114       }
115       return Indices{std::move(tokens), state};
116     }
117     std::vector<Token> tokens;
118     State state = State::kNonmonotonic;
119   };
120 
121   // Index vector related data required to Filter using IndexSearch.
122   struct OrderedIndices {
123     const uint32_t* data = nullptr;
124     uint32_t size = 0;
125     Indices::State state = Indices::State::kNonmonotonic;
126   };
127 
128   virtual ~DataLayerChain();
129 
130   // Start of public API.
131 
132   // Checks whether element at the provided index match |op| and |value|.
133   //
134   // Returns true if the element matches, false otherwise.
135   virtual SingleSearchResult SingleSearch(FilterOp op,
136                                           SqlValue value,
137                                           uint32_t row) const = 0;
138 
139   // Searches for elements which match |op| and |value| between |range.start|
140   // and |range.end|.
141   //
142   // Returns either a range or BitVector which indicate the positions in
143   // |range| which match the constraint. If a BitVector is returned, it will
144   // be *precisely* as large as |range.end|.
145   //
146   // Notes for callers:
147   //  * Callers should note that the return value of this function corresponds
148   //    to positions in the storage.
149   //
150   // Notes for implementors:
151   //  * Implementations should ensure that the return value is empty or *only*
152   //    includes positions in |range|. Callers are free to assume this and can
153   //    optimize based on it.
154   //  * Implementations should ensure that, if they return a BitVector, it is
155   //    precisely of size |range.end|.
Search(FilterOp op,SqlValue value,Range range)156   PERFETTO_ALWAYS_INLINE RangeOrBitVector Search(FilterOp op,
157                                                  SqlValue value,
158                                                  Range range) const {
159     PERFETTO_DCHECK(range.end <= size());
160     switch (ValidateSearchConstraints(op, value)) {
161       case SearchValidationResult::kAllData:
162         return RangeOrBitVector(range);
163       case SearchValidationResult::kNoData:
164         return RangeOrBitVector(Range());
165       case SearchValidationResult::kOk:
166         return SearchValidated(op, value, range);
167     }
168     PERFETTO_FATAL("For GCC");
169   }
170 
171   // Searches for elements which match |op| and |value| at the positions given
172   // by |indices| array.
173   //
174   // Returns either a range of BitVector which indicate the positions in
175   // |indices| which match the constraint. If a BitVector is returned, it will
176   // be *precisely* as large as |indices_count|.
177   //
178   // Notes for callers:
179   //  * Callers should note that the return value of this function corresponds
180   //    to positions in |indices| *not* positions in the storage.
181   //
182   // Notes for implementors:
183   //  * Implementations should ensure that, if they return a BitVector, it is
184   //    precisely of size |indices_count|.
IndexSearch(FilterOp op,SqlValue value,Indices & indices)185   PERFETTO_ALWAYS_INLINE void IndexSearch(FilterOp op,
186                                           SqlValue value,
187                                           Indices& indices) const {
188     switch (ValidateSearchConstraints(op, value)) {
189       case SearchValidationResult::kAllData:
190         return;
191       case SearchValidationResult::kNoData:
192         indices.tokens.clear();
193         return;
194       case SearchValidationResult::kOk:
195         IndexSearchValidated(op, value, indices);
196         return;
197     }
198     PERFETTO_FATAL("For GCC");
199   }
200 
201   // Searches for elements which match |op| and |value| at the positions given
202   // by OrderedIndicesdata.
203   //
204   // Returns a Range into OrderedIndicesdata of OrderedIndicesthat pass the
205   // constraint.
206   //
207   // Notes for callers:
208   //  * Should not be called on:
209   //    - kGlob and kRegex as those operations can't use the sorted state
210   //      hence they can't return a Range.
211   //    - kNe as this is inherently unsorted. Use kEq and then reverse the
212   //      result.
213   //  * Callers should note that the return value of this function corresponds
214   //    to positions in |indices| *not* positions in the storage.
215   PERFETTO_ALWAYS_INLINE Range
OrderedIndexSearch(FilterOp op,SqlValue value,const OrderedIndices & indices)216   OrderedIndexSearch(FilterOp op,
217                      SqlValue value,
218                      const OrderedIndices& indices) const {
219     switch (ValidateSearchConstraints(op, value)) {
220       case SearchValidationResult::kAllData:
221         return {0, indices.size};
222       case SearchValidationResult::kNoData:
223         return {};
224       case SearchValidationResult::kOk:
225         return OrderedIndexSearchValidated(op, value, indices);
226     }
227     PERFETTO_FATAL("For GCC");
228   }
229 
230   // Stable sorts an array of Token elements between |start| and |end|
231   // using a comparator defined by looking up the elements in this chain using
232   // the index given by Token::index. |direction| indicates the direction of
233   // the sort (ascending or descending).
234   //
235   // In simple terms the expectation is for implementations do something like:
236   // ```
237   // std::stable_sort(start, index, [](const Token& a, const Token& b) {
238   //  return Get(a.index) < Get(b.index);
239   // });
240   // ```
241   // with |Get| being a function to lookup the element in this chain.
242   virtual void StableSort(Token* start,
243                           Token* end,
244                           SortDirection direction) const = 0;
245 
246   // Removes all indices pointing to values that are duplicates, as a result the
247   // indices will only point to distinct (not duplicated) values.
248   //
249   // Notes for implementors:
250   // * Each layer that might introduce duplicates is responsible for removing
251   // them.
252   virtual void Distinct(Indices&) const = 0;
253 
254   // After calling this function Indices will have at most one element. If
255   // present it will point to the first index with the largest value in the
256   // chain.
257   virtual std::optional<Token> MaxElement(Indices&) const = 0;
258 
259   // After calling this function Indices will have at most one element. If
260   // present it will point to the first index with the smallest value in the
261   // chain.
262   virtual std::optional<Token> MinElement(Indices&) const = 0;
263 
264   // Returns a string which represents the column for debugging purposes.
265   //
266   // Warning: the format of the string returned by this class is *not* stable
267   // and should be relied upon for anything except printing for debugging
268   // purposes.
269   virtual std::string DebugString() const = 0;
270 
271   // Number of elements in stored data.
272   virtual uint32_t size() const = 0;
273 
274   // End of public API.
275   // The below methods might be public but are only intended for implementations
276   // of DataLayerChain.
277 
278   // Verifies whether any further filtering is needed and if not, whether the
279   // search would return all values or none of them. This allows for skipping
280   // the |Search| and |IndexSearch| in special cases.
281   //
282   // Notes for callers:
283   // * The SqlValue and FilterOp have to be valid in Sqlite: it will crash if
284   //   either: value is NULL and operation is different than "IS NULL" and "IS
285   //   NOT NULL" or the operation is "IS NULL" or "IS NOT NULL" and value is
286   //   different than NULL.
287   virtual SearchValidationResult ValidateSearchConstraints(FilterOp,
288                                                            SqlValue) const = 0;
289 
290   // Post-validated implementation of |Search|. See |Search|'s documentation.
291   virtual RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const = 0;
292 
293   // Post-validated implementation of |IndexSearch|. See |IndexSearch|'s
294   // documentation.
295   virtual void IndexSearchValidated(FilterOp, SqlValue, Indices&) const = 0;
296 
297   // Post-validated implementation of |OrderedIndexSearch|. See
298   // |OrderedIndexSearch|'s documentation.
299   Range OrderedIndexSearchValidated(FilterOp op,
300                                     SqlValue value,
301                                     const OrderedIndices& indices) const;
302 
303   // Returns the SqlValue representing the value at a given index.
304   //
305   // This function might be very tempting to use as it appears cheap on the
306   // surface but because of how DataLayerChains might be layered on top of each
307   // other, this might require *several* virtual function calls per index.
308   // If you're tempted to use this, please consider instead create a new
309   // "vectorized" function instead and only using this as a last resort.
310   //
311   // The correct "class" of algorithms to use this function are cases where you
312   // have a set of indices you want to lookup and based on the value returned
313   // you will only use a fraction of them. In this case, it might be worth
314   // paying the non-vectorized lookup to vastly reduce how many indices need
315   // to be translated.
316   //
317   // An example of such an algorithm is binary search on indices.
318   virtual SqlValue Get_AvoidUsingBecauseSlow(uint32_t index) const = 0;
319 };
320 
321 }  // namespace perfetto::trace_processor::column
322 
323 #endif  // SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_
324