1 /* 2 * Copyright (C) 2023 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 18 #define SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 19 20 #include <cstdint> 21 #include <memory> 22 #include <optional> 23 #include <string> 24 #include <utility> 25 #include <vector> 26 27 #include "perfetto/base/logging.h" 28 #include "perfetto/public/compiler.h" 29 #include "perfetto/trace_processor/basic_types.h" 30 #include "perfetto/trace_processor/ref_counted.h" 31 #include "src/trace_processor/db/column/types.h" 32 33 namespace perfetto::trace_processor::column { 34 class DataLayerChain; 35 36 // Data structure which either directly or indirectly (i.e. by transforming 37 // the contents of another DataLayer) provides the data of a column of a 38 // table. 39 class DataLayer : public RefCounted { 40 public: 41 // Arguments for MakeChain on how the inner chain should be interpreted. 42 struct ChainCreationArgs { 43 constexpr explicit ChainCreationArgs( 44 bool _does_layer_order_chain_contents = false) does_layer_order_chain_contentsChainCreationArgs45 : does_layer_order_chain_contents(_does_layer_order_chain_contents) {} 46 47 // Indicates whether the current data layer orders the inner chain. 48 // Currently used by ArrangementOverlay to decide whether the arrangement 49 // orders a given chain. 50 bool does_layer_order_chain_contents; 51 }; 52 virtual ~DataLayer(); 53 54 // Creates a DataLayerChain for a terminal DataLayer. This means the 55 // DataLayer directly should return the data it contains inside. 56 std::unique_ptr<DataLayerChain> MakeChain(); 57 58 // Creates a DataLayerChain for a non-terminal DataLayer. This means 59 // the DataLayer should transform the contents of the inner chain. 60 std::unique_ptr<DataLayerChain> MakeChain( 61 std::unique_ptr<DataLayerChain>, 62 ChainCreationArgs = ChainCreationArgs()); 63 64 protected: 65 // TODO(b/325583551): remove this when possible. 66 enum class Impl { 67 kArrangement, 68 kDenseNull, 69 kDummy, 70 kId, 71 kNull, 72 kNumericDouble, 73 kNumericUint32, 74 kNumericInt32, 75 kNumericInt64, 76 kRange, 77 kSelector, 78 kSetId, 79 kString, 80 }; DataLayer(Impl impl)81 explicit DataLayer(Impl impl) : impl_(impl) {} 82 83 private: 84 Impl impl_; 85 }; 86 87 // Corresponds to a series of DataLayer chained together. Provides 88 // functionality for querying the transformed data of the entire chain. 89 class DataLayerChain { 90 public: 91 // Index vector related data required to Filter using IndexSearch. 92 struct Indices { 93 enum class State { 94 // We can't guarantee that data is in monotonic order. 95 kNonmonotonic, 96 // Data is in monotonic order. 97 kMonotonic, 98 }; CreateIndices99 static Indices Create(const std::vector<uint32_t>& raw, State state) { 100 std::vector<Token> tokens; 101 tokens.reserve(raw.size()); 102 for (auto r : raw) { 103 tokens.push_back({r, r}); 104 } 105 return Indices{std::move(tokens), state}; 106 } CreateWithIndexPayloadForTestingIndices107 static Indices CreateWithIndexPayloadForTesting( 108 const std::vector<uint32_t>& raw, 109 State state) { 110 std::vector<Token> tokens; 111 tokens.reserve(raw.size()); 112 for (uint32_t i = 0; i < raw.size(); ++i) { 113 tokens.push_back(Token{raw[i], i}); 114 } 115 return Indices{std::move(tokens), state}; 116 } 117 std::vector<Token> tokens; 118 State state = State::kNonmonotonic; 119 }; 120 121 // Index vector related data required to Filter using IndexSearch. 122 struct OrderedIndices { 123 const uint32_t* data = nullptr; 124 uint32_t size = 0; 125 Indices::State state = Indices::State::kNonmonotonic; 126 }; 127 128 virtual ~DataLayerChain(); 129 130 // Start of public API. 131 132 // Checks whether element at the provided index match |op| and |value|. 133 // 134 // Returns true if the element matches, false otherwise. 135 virtual SingleSearchResult SingleSearch(FilterOp op, 136 SqlValue value, 137 uint32_t row) const = 0; 138 139 // Searches for elements which match |op| and |value| between |range.start| 140 // and |range.end|. 141 // 142 // Returns either a range or BitVector which indicate the positions in 143 // |range| which match the constraint. If a BitVector is returned, it will 144 // be *precisely* as large as |range.end|. 145 // 146 // Notes for callers: 147 // * Callers should note that the return value of this function corresponds 148 // to positions in the storage. 149 // 150 // Notes for implementors: 151 // * Implementations should ensure that the return value is empty or *only* 152 // includes positions in |range|. Callers are free to assume this and can 153 // optimize based on it. 154 // * Implementations should ensure that, if they return a BitVector, it is 155 // precisely of size |range.end|. Search(FilterOp op,SqlValue value,Range range)156 PERFETTO_ALWAYS_INLINE RangeOrBitVector Search(FilterOp op, 157 SqlValue value, 158 Range range) const { 159 PERFETTO_DCHECK(range.end <= size()); 160 switch (ValidateSearchConstraints(op, value)) { 161 case SearchValidationResult::kAllData: 162 return RangeOrBitVector(range); 163 case SearchValidationResult::kNoData: 164 return RangeOrBitVector(Range()); 165 case SearchValidationResult::kOk: 166 return SearchValidated(op, value, range); 167 } 168 PERFETTO_FATAL("For GCC"); 169 } 170 171 // Searches for elements which match |op| and |value| at the positions given 172 // by |indices| array. 173 // 174 // Returns either a range of BitVector which indicate the positions in 175 // |indices| which match the constraint. If a BitVector is returned, it will 176 // be *precisely* as large as |indices_count|. 177 // 178 // Notes for callers: 179 // * Callers should note that the return value of this function corresponds 180 // to positions in |indices| *not* positions in the storage. 181 // 182 // Notes for implementors: 183 // * Implementations should ensure that, if they return a BitVector, it is 184 // precisely of size |indices_count|. IndexSearch(FilterOp op,SqlValue value,Indices & indices)185 PERFETTO_ALWAYS_INLINE void IndexSearch(FilterOp op, 186 SqlValue value, 187 Indices& indices) const { 188 switch (ValidateSearchConstraints(op, value)) { 189 case SearchValidationResult::kAllData: 190 return; 191 case SearchValidationResult::kNoData: 192 indices.tokens.clear(); 193 return; 194 case SearchValidationResult::kOk: 195 IndexSearchValidated(op, value, indices); 196 return; 197 } 198 PERFETTO_FATAL("For GCC"); 199 } 200 201 // Searches for elements which match |op| and |value| at the positions given 202 // by OrderedIndicesdata. 203 // 204 // Returns a Range into OrderedIndicesdata of OrderedIndicesthat pass the 205 // constraint. 206 // 207 // Notes for callers: 208 // * Should not be called on: 209 // - kGlob and kRegex as those operations can't use the sorted state 210 // hence they can't return a Range. 211 // - kNe as this is inherently unsorted. Use kEq and then reverse the 212 // result. 213 // * Callers should note that the return value of this function corresponds 214 // to positions in |indices| *not* positions in the storage. 215 PERFETTO_ALWAYS_INLINE Range OrderedIndexSearch(FilterOp op,SqlValue value,const OrderedIndices & indices)216 OrderedIndexSearch(FilterOp op, 217 SqlValue value, 218 const OrderedIndices& indices) const { 219 switch (ValidateSearchConstraints(op, value)) { 220 case SearchValidationResult::kAllData: 221 return {0, indices.size}; 222 case SearchValidationResult::kNoData: 223 return {}; 224 case SearchValidationResult::kOk: 225 return OrderedIndexSearchValidated(op, value, indices); 226 } 227 PERFETTO_FATAL("For GCC"); 228 } 229 230 // Stable sorts an array of Token elements between |start| and |end| 231 // using a comparator defined by looking up the elements in this chain using 232 // the index given by Token::index. |direction| indicates the direction of 233 // the sort (ascending or descending). 234 // 235 // In simple terms the expectation is for implementations do something like: 236 // ``` 237 // std::stable_sort(start, index, [](const Token& a, const Token& b) { 238 // return Get(a.index) < Get(b.index); 239 // }); 240 // ``` 241 // with |Get| being a function to lookup the element in this chain. 242 virtual void StableSort(Token* start, 243 Token* end, 244 SortDirection direction) const = 0; 245 246 // Removes all indices pointing to values that are duplicates, as a result the 247 // indices will only point to distinct (not duplicated) values. 248 // 249 // Notes for implementors: 250 // * Each layer that might introduce duplicates is responsible for removing 251 // them. 252 virtual void Distinct(Indices&) const = 0; 253 254 // After calling this function Indices will have at most one element. If 255 // present it will point to the first index with the largest value in the 256 // chain. 257 virtual std::optional<Token> MaxElement(Indices&) const = 0; 258 259 // After calling this function Indices will have at most one element. If 260 // present it will point to the first index with the smallest value in the 261 // chain. 262 virtual std::optional<Token> MinElement(Indices&) const = 0; 263 264 // Returns a string which represents the column for debugging purposes. 265 // 266 // Warning: the format of the string returned by this class is *not* stable 267 // and should be relied upon for anything except printing for debugging 268 // purposes. 269 virtual std::string DebugString() const = 0; 270 271 // Number of elements in stored data. 272 virtual uint32_t size() const = 0; 273 274 // End of public API. 275 // The below methods might be public but are only intended for implementations 276 // of DataLayerChain. 277 278 // Verifies whether any further filtering is needed and if not, whether the 279 // search would return all values or none of them. This allows for skipping 280 // the |Search| and |IndexSearch| in special cases. 281 // 282 // Notes for callers: 283 // * The SqlValue and FilterOp have to be valid in Sqlite: it will crash if 284 // either: value is NULL and operation is different than "IS NULL" and "IS 285 // NOT NULL" or the operation is "IS NULL" or "IS NOT NULL" and value is 286 // different than NULL. 287 virtual SearchValidationResult ValidateSearchConstraints(FilterOp, 288 SqlValue) const = 0; 289 290 // Post-validated implementation of |Search|. See |Search|'s documentation. 291 virtual RangeOrBitVector SearchValidated(FilterOp, SqlValue, Range) const = 0; 292 293 // Post-validated implementation of |IndexSearch|. See |IndexSearch|'s 294 // documentation. 295 virtual void IndexSearchValidated(FilterOp, SqlValue, Indices&) const = 0; 296 297 // Post-validated implementation of |OrderedIndexSearch|. See 298 // |OrderedIndexSearch|'s documentation. 299 Range OrderedIndexSearchValidated(FilterOp op, 300 SqlValue value, 301 const OrderedIndices& indices) const; 302 303 // Returns the SqlValue representing the value at a given index. 304 // 305 // This function might be very tempting to use as it appears cheap on the 306 // surface but because of how DataLayerChains might be layered on top of each 307 // other, this might require *several* virtual function calls per index. 308 // If you're tempted to use this, please consider instead create a new 309 // "vectorized" function instead and only using this as a last resort. 310 // 311 // The correct "class" of algorithms to use this function are cases where you 312 // have a set of indices you want to lookup and based on the value returned 313 // you will only use a fraction of them. In this case, it might be worth 314 // paying the non-vectorized lookup to vastly reduce how many indices need 315 // to be translated. 316 // 317 // An example of such an algorithm is binary search on indices. 318 virtual SqlValue Get_AvoidUsingBecauseSlow(uint32_t index) const = 0; 319 }; 320 321 } // namespace perfetto::trace_processor::column 322 323 #endif // SRC_TRACE_PROCESSOR_DB_COLUMN_DATA_LAYER_H_ 324