xref: /aosp_15_r20/external/perfetto/src/trace_processor/containers/row_map.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <iterator>
23 #include <numeric>
24 #include <optional>
25 #include <utility>
26 #include <variant>
27 #include <vector>
28 
29 #include "perfetto/base/compiler.h"
30 #include "perfetto/base/logging.h"
31 #include "src/trace_processor/containers/bit_vector.h"
32 
33 namespace perfetto {
34 namespace trace_processor {
35 
36 // Stores a list of row indicies in a space efficient manner. One or more
37 // columns can refer to the same RowMap. The RowMap defines the access pattern
38 // to iterate on rows.
39 //
40 // Naming convention:
41 //
42 // As both the input and output of RowMap is a uint32_t, it can be quite
43 // confusing to reason about what parameters/return values of the functions
44 // of RowMap actually means. To help with this, we define a strict convention
45 // of naming.
46 //
47 // row:     input - that is, rows are what are passed into operator[]; named as
48 //          such because a "row" number in a table is converted to an index to
49 //          lookup in the backing vectors.
50 // index:   output - that is, indices are what are returned from operator[];
51 //          named as such because an "index" is what's used to lookup data
52 //          from the backing vectors.
53 //
54 // Implementation details:
55 //
56 // Behind the scenes, this class is impelemented using one of three backing
57 // data-structures:
58 // 1. A start and end index (internally named 'range')
59 // 1. BitVector
60 // 2. std::vector<uint32_t> (internally named IndexVector).
61 //
62 // Generally the preference for data structures is range > BitVector >
63 // std::vector<uint32>; this ordering is based mainly on memory efficiency as we
64 // expect RowMaps to be large.
65 //
66 // However, BitVector and std::vector<uint32_t> allow things which are not
67 // possible with the data-structures preferred to them:
68 //  * a range (as the name suggests) can only store a compact set of indices
69 //  with no holes. A BitVector works around this limitation by storing a 1 at an
70 //  index where that row is part of the RowMap and 0 otherwise.
71 //  * as soon as ordering or duplicate rows come into play, we cannot use a
72 //   BitVector anymore as ordering/duplicate row information cannot be captured
73 //   by a BitVector.
74 //
75 // For small, sparse RowMaps, it is possible that a std::vector<uint32_t> is
76 // more efficient than a BitVector; in this case, we will make a best effort
77 // switch to it but the cases where this happens is not precisely defined.
78 class RowMap {
79  public:
80   using InputRow = uint32_t;
81   using OutputIndex = uint32_t;
82   using IndexVector = std::vector<OutputIndex>;
83 
84   struct Range {
RangeRange85     Range(OutputIndex start_index, OutputIndex end_index)
86         : start(start_index), end(end_index) {
87       PERFETTO_DCHECK(start_index <= end_index);
88     }
RangeRange89     Range() : start(0), end(0) {}
90 
91     OutputIndex start;  // This is an inclusive index.
92     OutputIndex end;    // This is an exclusive index.
93 
emptyRange94     bool empty() const { return size() == 0; }
sizeRange95     uint32_t size() const { return end - start; }
ContainsRange96     inline bool Contains(uint32_t val) const {
97       return val >= start && val < end;
98     }
99   };
100 
101   // Allows efficient iteration over the rows of a RowMap.
102   //
103   // Note: you should usually prefer to use the methods on RowMap directly (if
104   // they exist for the task being attempted) to avoid the lookup for the mode
105   // of the RowMap on every method call.
106   class Iterator {
107    public:
108     explicit Iterator(const RowMap* rm);
109 
110     Iterator(const Iterator&) = delete;
111     Iterator& operator=(const Iterator&) = delete;
112 
113     Iterator(Iterator&&) noexcept = default;
114     Iterator& operator=(Iterator&&) = default;
115 
116     // Forwards the iterator to the next row of the RowMap.
Next()117     void Next() { ++ordinal_; }
118 
119     // Returns if the iterator is still valid.
120     explicit operator bool() const {
121       if (const auto* range = std::get_if<Range>(&rm_->data_)) {
122         return ordinal_ < range->end;
123       }
124       if (std::get_if<BitVector>(&rm_->data_)) {
125         return ordinal_ < results_.size();
126       }
127       if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
128         return ordinal_ < vec->size();
129       }
130       PERFETTO_FATAL("Didn't match any variant type.");
131     }
132 
133     // Returns the index pointed to by this iterator.
index()134     OutputIndex index() const {
135       if (std::get_if<Range>(&rm_->data_)) {
136         return ordinal_;
137       }
138       if (std::get_if<BitVector>(&rm_->data_)) {
139         return results_[ordinal_];
140       }
141       if (const auto* vec = std::get_if<IndexVector>(&rm_->data_)) {
142         return (*vec)[ordinal_];
143       }
144       PERFETTO_FATAL("Didn't match any variant type.");
145     }
146 
147     // Returns the row of the index the iterator points to.
row()148     InputRow row() const {
149       if (const auto* range = std::get_if<Range>(&rm_->data_)) {
150         return ordinal_ - range->start;
151       }
152       if (std::get_if<BitVector>(&rm_->data_) ||
153           std::get_if<IndexVector>(&rm_->data_)) {
154         return ordinal_;
155       }
156       PERFETTO_FATAL("Didn't match any variant type.");
157     }
158 
159    private:
160     // Ordinal will not be used for BitVector based RowMap.
161     uint32_t ordinal_ = 0;
162     // Not empty for BitVector based RowMap.
163     std::vector<uint32_t> results_;
164 
165     const RowMap* rm_ = nullptr;
166   };
167 
168   // Creates an empty RowMap.
169   // By default this will be implemented using a range.
170   RowMap();
171 
172   // Creates a RowMap containing the range of indices between |start| and |end|
173   // i.e. all indices between |start| (inclusive) and |end| (exclusive).
174   RowMap(OutputIndex start, OutputIndex end);
175 
176   // Creates a RowMap backed by a BitVector.
177   explicit RowMap(BitVector);
178 
179   // Creates a RowMap backed by an std::vector<uint32_t>.
180   explicit RowMap(IndexVector);
181 
182   RowMap(const RowMap&) noexcept = delete;
183   RowMap& operator=(const RowMap&) = delete;
184 
185   RowMap(RowMap&&) noexcept = default;
186   RowMap& operator=(RowMap&&) = default;
187 
188   // Creates a RowMap containing just |index|.
189   // By default this will be implemented using a range.
SingleRow(OutputIndex index)190   static RowMap SingleRow(OutputIndex index) {
191     return RowMap(index, index + 1);
192   }
193 
194   // Creates a copy of the RowMap.
195   // We have an explicit copy function because RowMap can hold onto large chunks
196   // of memory and we want to be very explicit when making a copy to avoid
197   // accidental leaks and copies.
198   RowMap Copy() const;
199 
200   // Returns the size of the RowMap; that is the number of indices in the
201   // RowMap.
size()202   uint32_t size() const {
203     if (const auto* range = std::get_if<Range>(&data_)) {
204       return range->size();
205     }
206     if (const auto* bv = std::get_if<BitVector>(&data_)) {
207       return bv->CountSetBits();
208     }
209     if (const auto* vec = std::get_if<IndexVector>(&data_)) {
210       return static_cast<uint32_t>(vec->size());
211     }
212     NoVariantMatched();
213   }
214 
215   // Returns whether this rowmap is empty.
empty()216   bool empty() const { return size() == 0; }
217 
218   // Returns the index at the given |row|.
Get(InputRow row)219   OutputIndex Get(InputRow row) const {
220     if (const auto* range = std::get_if<Range>(&data_)) {
221       return GetRange(*range, row);
222     }
223     if (const auto* bv = std::get_if<BitVector>(&data_)) {
224       return GetBitVector(*bv, row);
225     }
226     if (const auto* vec = std::get_if<IndexVector>(&data_)) {
227       return GetIndexVector(*vec, row);
228     }
229     NoVariantMatched();
230   }
231 
232   // Returns the vector of all indices in the RowMap.
GetAllIndices()233   std::vector<OutputIndex> GetAllIndices() const {
234     if (const auto* range = std::get_if<Range>(&data_)) {
235       std::vector<uint32_t> res(range->size());
236       std::iota(res.begin(), res.end(), range->start);
237       return res;
238     }
239     if (const auto* bv = std::get_if<BitVector>(&data_)) {
240       return bv->GetSetBitIndices();
241     }
242     if (const auto* vec = std::get_if<IndexVector>(&data_)) {
243       return *vec;
244     }
245     NoVariantMatched();
246   }
247 
248   // Returns maximum size of the output. Ie range.end or size of the BV.
249   OutputIndex Max() const;
250 
251   // Returns whether the RowMap contains the given index.
Contains(OutputIndex index)252   bool Contains(OutputIndex index) const {
253     if (const auto* range = std::get_if<Range>(&data_)) {
254       return index >= range->start && index < range->end;
255     }
256     if (const auto* bv = std::get_if<BitVector>(&data_)) {
257       return index < bv->size() && bv->IsSet(index);
258     }
259     if (const auto* vec = std::get_if<IndexVector>(&data_)) {
260       return std::find(vec->begin(), vec->end(), index) != vec->end();
261     }
262     NoVariantMatched();
263   }
264 
265   // Returns the first row of the given |index| in the RowMap.
RowOf(OutputIndex index)266   std::optional<InputRow> RowOf(OutputIndex index) const {
267     if (const auto* range = std::get_if<Range>(&data_)) {
268       if (index < range->start || index >= range->end)
269         return std::nullopt;
270       return index - range->start;
271     }
272     if (const auto* bv = std::get_if<BitVector>(&data_)) {
273       return index < bv->size() && bv->IsSet(index)
274                  ? std::make_optional(bv->CountSetBits(index))
275                  : std::nullopt;
276     }
277     if (const auto* vec = std::get_if<IndexVector>(&data_)) {
278       auto it = std::find(vec->begin(), vec->end(), index);
279       return it != vec->end() ? std::make_optional(static_cast<InputRow>(
280                                     std::distance(vec->begin(), it)))
281                               : std::nullopt;
282     }
283     NoVariantMatched();
284   }
285 
286   // Performs an ordered insert of the index into the current RowMap
287   // (precondition: this RowMap is ordered based on the indices it contains).
288   //
289   // Example:
290   // this = [1, 5, 10, 11, 20]
291   // Insert(10)  // this = [1, 5, 10, 11, 20]
292   // Insert(12)  // this = [1, 5, 10, 11, 12, 20]
293   // Insert(21)  // this = [1, 5, 10, 11, 12, 20, 21]
294   // Insert(2)   // this = [1, 2, 5, 10, 11, 12, 20, 21]
295   //
296   // Speecifically, this means that it is only valid to call Insert on a RowMap
297   // which is sorted by the indices it contains; this is automatically true when
298   // the RowMap is in range or BitVector mode but is a required condition for
299   // IndexVector mode.
Insert(OutputIndex index)300   void Insert(OutputIndex index) {
301     if (auto* range = std::get_if<Range>(&data_)) {
302       if (index == range->end) {
303         // Fast path: if we're just appending to the end
304         // of the range, we can stay in range mode and
305         // just bump the end index.
306         range->end++;
307         return;
308       }
309 
310       // Slow path: the insert is somewhere else other
311       // than the end. This means we need to switch to
312       // using a BitVector instead.
313       BitVector bv;
314       bv.Resize(range->start, false);
315       bv.Resize(range->end, true);
316       InsertIntoBitVector(bv, index);
317       data_ = std::move(bv);
318       return;
319     }
320     if (auto* bv = std::get_if<BitVector>(&data_)) {
321       InsertIntoBitVector(*bv, index);
322       return;
323     }
324     if (auto* vec = std::get_if<IndexVector>(&data_)) {
325       PERFETTO_DCHECK(std::is_sorted(vec->begin(), vec->end()));
326       auto it = std::upper_bound(vec->begin(), vec->end(), index);
327       vec->insert(it, index);
328       return;
329     }
330     NoVariantMatched();
331   }
332 
333   // Updates this RowMap by 'picking' the indices given by |picker|.
334   // This is easiest to explain with an example; suppose we have the following
335   // RowMaps:
336   // this  : [0, 1, 4, 10, 11]
337   // picker: [0, 3, 4, 4, 2]
338   //
339   // After calling Apply(picker), we now have the following:
340   // this  : [0, 10, 11, 11, 4]
341   //
342   // Conceptually, we are performing the following algorithm:
343   // RowMap rm = Copy()
344   // for (p : picker)
345   //   rm[i++] = this[p]
346   // return rm;
SelectRows(const RowMap & selector)347   RowMap SelectRows(const RowMap& selector) const {
348     uint32_t size = selector.size();
349 
350     // If the selector is empty, just return an empty RowMap.
351     if (size == 0u)
352       return {};
353 
354     // If the selector is just picking a single row, just return that row
355     // without any additional overhead.
356     if (size == 1u)
357       return RowMap::SingleRow(Get(selector.Get(0)));
358 
359     // For all other cases, go into the slow-path.
360     return SelectRowsSlow(selector);
361   }
362 
363   // Intersects |this| with |second| independent of underlying structure of both
364   // RowMaps. Modifies |this| to only contain indices present in |second|.
365   void Intersect(const RowMap& second);
366 
367   // Intersects this RowMap with |index|. If this RowMap contained |index|, then
368   // it will *only* contain |index|. Otherwise, it will be empty.
IntersectExact(OutputIndex index)369   void IntersectExact(OutputIndex index) {
370     if (Contains(index)) {
371       *this = RowMap(index, index + 1);
372     } else {
373       Clear();
374     }
375   }
376 
377   // Clears this RowMap by resetting it to a newly constructed state.
Clear()378   void Clear() { *this = RowMap(); }
379 
380   // Converts this RowMap to an index vector in the most efficient way
381   // possible.
TakeAsIndexVector()382   std::vector<uint32_t> TakeAsIndexVector() && {
383     if (const auto* range = std::get_if<Range>(&data_)) {
384       std::vector<uint32_t> rm(range->size());
385       std::iota(rm.begin(), rm.end(), range->start);
386       return rm;
387     }
388     if (const auto* bv = std::get_if<BitVector>(&data_)) {
389       return bv->GetSetBitIndices();
390     }
391     if (auto* vec = std::get_if<IndexVector>(&data_)) {
392       return std::move(*vec);
393     }
394     NoVariantMatched();
395   }
396 
397   // Returns the data in RowMap BitVector, nullptr if RowMap is in a different
398   // mode.
GetIfBitVector()399   const BitVector* GetIfBitVector() const {
400     return std::get_if<BitVector>(&data_);
401   }
402 
403   // Returns the data in RowMap IndexVector, nullptr if RowMap is in a different
404   // mode.
GetIfIndexVector()405   const std::vector<uint32_t>* GetIfIndexVector() const {
406     return std::get_if<IndexVector>(&data_);
407   }
408 
409   // Returns the data in RowMap Range, nullptr if RowMap is in a different
410   // mode.
GetIfIRange()411   const Range* GetIfIRange() const { return std::get_if<Range>(&data_); }
412 
413   // Returns the iterator over the rows in this RowMap.
IterateRows()414   Iterator IterateRows() const { return Iterator(this); }
415 
416   // Returns if the RowMap is internally represented using a range.
IsRange()417   bool IsRange() const { return std::holds_alternative<Range>(data_); }
418 
419   // Returns if the RowMap is internally represented using a BitVector.
IsBitVector()420   bool IsBitVector() const { return std::holds_alternative<BitVector>(data_); }
421 
422   // Returns if the RowMap is internally represented using an index vector.
IsIndexVector()423   bool IsIndexVector() const {
424     return std::holds_alternative<IndexVector>(data_);
425   }
426 
427  private:
428   using Variant = std::variant<Range, BitVector, IndexVector>;
429 
430   explicit RowMap(Range);
431 
432   explicit RowMap(Variant);
433 
GetRange(Range r,InputRow row)434   PERFETTO_ALWAYS_INLINE static OutputIndex GetRange(Range r, InputRow row) {
435     return r.start + row;
436   }
GetBitVector(const BitVector & bv,uint32_t row)437   PERFETTO_ALWAYS_INLINE static OutputIndex GetBitVector(const BitVector& bv,
438                                                          uint32_t row) {
439     return bv.IndexOfNthSet(row);
440   }
GetIndexVector(const IndexVector & vec,uint32_t row)441   PERFETTO_ALWAYS_INLINE static OutputIndex GetIndexVector(
442       const IndexVector& vec,
443       uint32_t row) {
444     return vec[row];
445   }
446 
447   RowMap SelectRowsSlow(const RowMap& selector) const;
448 
InsertIntoBitVector(BitVector & bv,OutputIndex row)449   static void InsertIntoBitVector(BitVector& bv, OutputIndex row) {
450     if (row == bv.size()) {
451       bv.AppendTrue();
452       return;
453     }
454     if (row > bv.size())
455       bv.Resize(row + 1, false);
456     bv.Set(row);
457   }
458 
NoVariantMatched()459   PERFETTO_NORETURN static void NoVariantMatched() {
460     PERFETTO_FATAL("Didn't match any variant type.");
461   }
462 
463   Variant data_;
464 };
465 
466 }  // namespace trace_processor
467 }  // namespace perfetto
468 
469 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_ROW_MAP_H_
470