xref: /aosp_15_r20/external/perfetto/src/trace_processor/containers/bit_vector.h (revision 6dbdd20afdafa5e3ca9b8809fa73465d530080dc)
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
19 
20 #include <algorithm>
21 #include <cstdint>
22 #include <initializer_list>
23 #include <iterator>
24 #include <utility>
25 #include <vector>
26 
27 #include "perfetto/base/compiler.h"
28 #include "perfetto/base/logging.h"
29 #include "perfetto/public/compiler.h"
30 
31 namespace perfetto {
32 namespace protos::pbzero {
33 class SerializedColumn_BitVector;
34 class SerializedColumn_BitVector_Decoder;
35 }  // namespace protos::pbzero
36 
37 namespace trace_processor {
38 namespace internal {
39 
40 class BaseIterator;
41 class SetBitsIterator;
42 
43 }  // namespace internal
44 
45 // A BitVector which compactly stores a vector of bools using a single bit
46 // for each bool.
47 class BitVector {
48  public:
49   static constexpr uint32_t kBitsInWord = 64;
50 
51   // Builder class which allows efficiently creating a BitVector by appending
52   // words. Using this class is generally far more efficient than trying to set
53   // bits directly in a BitVector or even appending one bit at a time.
54   class Builder {
55    public:
56     // Creates a Builder for building a BitVector of |size| bits.
57     explicit Builder(uint32_t size, uint32_t skip = 0)
words_(BlockCount (size)* Block::kWords)58         : words_(BlockCount(size) * Block::kWords),
59           global_bit_offset_(skip),
60           size_(size),
61           skipped_blocks_(skip / Block::kBits) {
62       PERFETTO_CHECK(global_bit_offset_ <= size_);
63     }
64 
65     // Appends a single bit to the builder.
66     // Note: |AppendWord| is far more efficient than this method so should be
67     // preferred.
Append(bool value)68     void Append(bool value) {
69       PERFETTO_DCHECK(global_bit_offset_ < size_);
70 
71       words_[global_bit_offset_ / BitWord::kBits] |=
72           static_cast<uint64_t>(value) << global_bit_offset_ % BitWord::kBits;
73       global_bit_offset_++;
74     }
75 
76     // Appends a whole word to the Builder. Builder has to end on a word
77     // boundary before calling this function.
AppendWord(uint64_t word)78     void AppendWord(uint64_t word) {
79       PERFETTO_DCHECK(global_bit_offset_ % BitWord::kBits == 0);
80       PERFETTO_DCHECK(global_bit_offset_ + BitWord::kBits <= size_);
81 
82       words_[global_bit_offset_ / BitWord::kBits] = word;
83       global_bit_offset_ += BitWord::kBits;
84     }
85 
86     // Creates a BitVector from this Builder.
Build()87     BitVector Build() && {
88       if (size_ == 0)
89         return {};
90 
91       std::vector<uint32_t> counts(BlockCount(size_));
92       PERFETTO_CHECK(skipped_blocks_ <= counts.size());
93       for (uint32_t i = skipped_blocks_ + 1; i < counts.size(); ++i) {
94         counts[i] = counts[i - 1] +
95                     ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits();
96       }
97       return {std::move(words_), std::move(counts), size_};
98     }
99 
100     // Returns the number of bits which are in complete words which can be
101     // appended to this builder before having to fallback to |Append| due to
102     // being close to the end.
BitsInCompleteWordsUntilFull()103     uint32_t BitsInCompleteWordsUntilFull() const {
104       uint32_t next_word = WordCount(global_bit_offset_);
105       uint32_t end_word = WordFloor(size_);
106       uint32_t complete_words = next_word < end_word ? end_word - next_word : 0;
107       return complete_words * BitWord::kBits;
108     }
109 
110     // Returns the number of bits which should be appended using |Append| either
111     // hitting a word boundary (and thus able to use |AppendWord|) or until the
112     // BitVector is full (i.e. no more Appends should happen), whichever would
113     // happen first.
BitsUntilWordBoundaryOrFull()114     uint32_t BitsUntilWordBoundaryOrFull() const {
115       if (global_bit_offset_ == 0 && size_ < BitWord::kBits) {
116         return size_;
117       }
118       uint8_t word_bit_offset = global_bit_offset_ % BitWord::kBits;
119       return std::min(BitsUntilFull(),
120                       (BitWord::kBits - word_bit_offset) % BitWord::kBits);
121     }
122 
123     // Returns the number of bits which should be appended using |Append| before
124     // hitting a word boundary (and thus able to use |AppendWord|) or until the
125     // BitVector is full (i.e. no more Appends should happen).
BitsUntilFull()126     uint32_t BitsUntilFull() const { return size_ - global_bit_offset_; }
127 
128    private:
129     std::vector<uint64_t> words_;
130     uint32_t global_bit_offset_ = 0;
131     uint32_t size_ = 0;
132     uint32_t skipped_blocks_ = 0;
133   };
134 
135   // Creates an empty BitVector.
136   BitVector();
137 
138   BitVector(std::initializer_list<bool> init);
139 
140   // Creates a BitVector of |count| size filled with |value|.
141   explicit BitVector(uint32_t count, bool value = false);
142 
143   BitVector(const BitVector&) = delete;
144   BitVector& operator=(const BitVector&) = delete;
145 
146   // Enable moving BitVectors as they have no unmovable state.
147   BitVector(BitVector&&) noexcept = default;
148   BitVector& operator=(BitVector&&) = default;
149 
150   // Create a copy of the BitVector.
151   BitVector Copy() const;
152 
153   // Bitwise Not of the BitVector.
154   void Not();
155 
156   // Bitwise Or of the BitVector.
157   void Or(const BitVector&);
158 
159   // Bitwise And of the BitVector.
160   void And(const BitVector&);
161 
162   // Returns the size of the BitVector.
size()163   uint32_t size() const { return static_cast<uint32_t>(size_); }
164 
165   // Returns whether the bit at |idx| is set.
IsSet(uint32_t idx)166   bool IsSet(uint32_t idx) const {
167     PERFETTO_DCHECK(idx < size());
168     return ConstBitWord(&words_[WordFloor(idx)]).IsSet(idx % BitWord::kBits);
169   }
170 
171   // Returns the number of set bits in the BitVector.
CountSetBits()172   uint32_t CountSetBits() const { return CountSetBits(size()); }
173 
174   // Returns the number of set bits between the start of the BitVector
175   // (inclusive) and the index |end| (exclusive).
CountSetBits(uint32_t end)176   uint32_t CountSetBits(uint32_t end) const {
177     if (end == 0)
178       return 0;
179 
180     // Although the external interface we present uses an exclusive |end|,
181     // internally it's a lot nicer to work with an inclusive |end| (mainly
182     // because we get block rollovers on exclusive ends which means we need
183     // to have if checks to ensure we don't overflow the number of blocks).
184     Address addr = IndexToAddress(end - 1);
185 
186     // Add the number of set bits until the start of the block to the number
187     // of set bits until the end address inside the block.
188     return counts_[addr.block_idx] +
189            ConstBlockFromIndex(addr.block_idx).CountSetBits(addr.block_offset);
190   }
191 
192   // Returns the index of the |n|th set bit. Should only be called with |n| <
193   // CountSetBits().
IndexOfNthSet(uint32_t n)194   uint32_t IndexOfNthSet(uint32_t n) const {
195     PERFETTO_DCHECK(n < CountSetBits());
196 
197     // First search for the block which, up until the start of it, has more than
198     // n bits set. Note that this should never return |counts.begin()| as
199     // that should always be 0.
200     // TODO(lalitm): investigate whether we can make this faster with small
201     // binary search followed by a linear search instead of binary searching the
202     // full way.
203     auto it = std::upper_bound(counts_.begin(), counts_.end(), n);
204     PERFETTO_DCHECK(it != counts_.begin());
205 
206     // Go back one block to find the block which has the bit we are looking for.
207     uint32_t block_idx =
208         static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1);
209 
210     // Figure out how many set bits forward we are looking inside the block
211     // by taking away the number of bits at the start of the block from n.
212     uint32_t set_in_block = n - counts_[block_idx];
213 
214     // Compute the address of the bit in the block then convert the full
215     // address back to an index.
216     BlockOffset block_offset =
217         ConstBlockFromIndex(block_idx).IndexOfNthSet(set_in_block);
218     return AddressToIndex(Address{block_idx, block_offset});
219   }
220 
221   // Sets the bit at index |idx| to true and returns the previous value.
Set(uint32_t idx)222   bool Set(uint32_t idx) {
223     // Set the bit to the correct value inside the block but store the old
224     // bit to help fix the counts.
225     auto addr = IndexToAddress(idx);
226     bool old_value =
227         ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
228 
229     // If the old value was unset, set the bit and add one to the count.
230     if (PERFETTO_LIKELY(!old_value)) {
231       BlockFromIndex(addr.block_idx).Set(addr.block_offset);
232 
233       auto size = static_cast<uint32_t>(counts_.size());
234       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
235         counts_[i]++;
236       }
237     }
238     return old_value;
239   }
240 
241   // Sets the bit at index |idx| to false.
Clear(uint32_t idx)242   void Clear(uint32_t idx) {
243     // Set the bit to the correct value inside the block but store the old
244     // bit to help fix the counts.
245     auto addr = IndexToAddress(idx);
246     bool old_value =
247         ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset);
248 
249     // If the old value was set, clear the bit and subtract one from all the
250     // counts.
251     if (PERFETTO_LIKELY(old_value)) {
252       BlockFromIndex(addr.block_idx).Clear(addr.block_offset);
253 
254       auto size = static_cast<uint32_t>(counts_.size());
255       for (uint32_t i = addr.block_idx + 1; i < size; ++i) {
256         counts_[i]--;
257       }
258     }
259   }
260 
261   // Appends true to the BitVector.
AppendTrue()262   void AppendTrue() {
263     AppendFalse();
264     Address addr = IndexToAddress(size() - 1);
265     BlockFromIndex(addr.block_idx).Set(addr.block_offset);
266   }
267 
268   // Appends false to the BitVector.
AppendFalse()269   void AppendFalse() {
270     Address addr = IndexToAddress(size_);
271     uint32_t old_blocks_size = BlockCount();
272     uint32_t new_blocks_size = addr.block_idx + 1;
273 
274     if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) {
275       uint32_t t = CountSetBits();
276       words_.resize(words_.size() + Block::kWords);
277       counts_.emplace_back(t);
278     }
279 
280     size_++;
281     // We don't need to clear the bit as we ensure that anything after
282     // size_ is always set to false.
283   }
284 
285   // Resizes the BitVector to the given |size|.
286   // Truncates the BitVector if |size| < |size()| or fills the new space with
287   // |filler| if |size| > |size()|. Calling this method is a noop if |size| ==
288   // |size()|.
289   void Resize(uint32_t new_size, bool filler = false);
290 
291   // Creates a BitVector of size |end| with the bits between |start| and |end|
292   // filled by calling the filler function |f(index of bit)|.
293   //
294   // As an example, suppose RangeForTesting(3, 7, [](x) { return x < 5 }). This
295   // would result in the following BitVector: [0 0 0 1 1 0 0]
296   template <typename Filler = bool(uint32_t)>
RangeForTesting(uint32_t start,uint32_t end,Filler f)297   PERFETTO_WARN_UNUSED_RESULT static BitVector RangeForTesting(uint32_t start,
298                                                                uint32_t end,
299                                                                Filler f) {
300     // Compute the block index and BitVector index where we start and end
301     // working one block at a time.
302     uint32_t start_fast_block = BlockCount(start);
303     uint32_t start_fast_idx = BlockToIndex(start_fast_block);
304     BitVector bv(start, false);
305 
306     // Minimum value of start_fast_idx is numer of bits in block, so we need to
307     // seperate calculation for shorter ranges.
308     if (start_fast_idx > end) {
309       for (uint32_t i = start; i < end; ++i) {
310         bv.Append(f(i));
311       }
312       return bv;
313     }
314 
315     uint32_t end_fast_block = BlockFloor(end);
316     uint32_t end_fast_idx = BlockToIndex(end_fast_block);
317 
318     // Fill up to |start_fast_index| with values from the filler.
319     for (uint32_t i = start; i < start_fast_idx; ++i) {
320       bv.Append(f(i));
321     }
322 
323     // Assert words_ vector is full and size_ is properly calculated.
324     PERFETTO_DCHECK(bv.words_.size() % Block::kWords == 0);
325     PERFETTO_DCHECK(bv.words_.size() * BitWord::kBits == bv.size_);
326 
327     // At this point we can work one block at a time.
328     bv.words_.resize(bv.words_.size() +
329                      Block::kWords * (end_fast_block - start_fast_block));
330     for (uint32_t i = start_fast_block; i < end_fast_block; ++i) {
331       uint64_t* block_start_word = &bv.words_[i * Block::kWords];
332       Block(block_start_word).FromFiller(bv.size_, f);
333       bv.counts_.emplace_back(bv.CountSetBits());
334       bv.size_ += Block::kBits;
335     }
336 
337     // Add the last few elements to finish up to |end|.
338     for (uint32_t i = end_fast_idx; i < end; ++i) {
339       bv.Append(f(i));
340     }
341 
342     return bv;
343   }
344 
345   // Creates BitVector from a vector of sorted indices. Set bits in the
346   // resulting BitVector are values from the index vector.
347   // Note for callers - the passed index vector has to:
348   // - be sorted
349   // - have first element >= 0
350   // - last value smaller than numeric limit of uint32_t.
351   PERFETTO_WARN_UNUSED_RESULT static BitVector FromSortedIndexVector(
352       const std::vector<int64_t>&);
353 
354   // Creates BitVector from a vector of unsorted indices. Set bits in the
355   // resulting BitVector are values from the index vector.
356   PERFETTO_WARN_UNUSED_RESULT static BitVector FromUnsortedIndexVector(
357       const std::vector<uint32_t>&);
358 
359   // Creates a BitVector of size `min(range_end, size())` with bits between
360   // |start| and |end| filled with corresponding bits from |this| BitVector.
361   PERFETTO_WARN_UNUSED_RESULT BitVector
362   IntersectRange(uint32_t range_start, uint32_t range_end) const;
363 
364   // Requests the removal of unused capacity.
365   // Matches the semantics of std::vector::shrink_to_fit.
ShrinkToFit()366   void ShrinkToFit() {
367     words_.shrink_to_fit();
368     counts_.shrink_to_fit();
369   }
370 
371   // Updates the ith set bit of this BitVector with the value of
372   // |other.IsSet(i)|.
373   //
374   // This is the best way to batch update all the bits which are set; for
375   // example when filtering rows, we want to filter all rows which are currently
376   // included but ignore rows which have already been excluded.
377   //
378   // For example suppose the following:
379   // this:  1 1 0 0 1 0 1
380   // other: 0 1 1 0
381   // This will change this to the following:
382   // this:  0 1 0 0 1 0 0
383   void UpdateSetBits(const BitVector& other);
384 
385   // For each set bit position  in |other|, Selects the value of each bit in
386   // |this| and stores them contiguously in |this|.
387   //
388   // Precondition: |this.size()| <= |other.size()|.
389   //
390   // For example suppose the following:
391   // this:  1 1 0 0 1 0 1
392   // other: 0 1 0 1 0 1 0 0 1 0
393   // |this| will change this to the following:
394   // this:  1 0 0
395   void SelectBits(const BitVector& other);
396 
397   // Returns the approximate cost (in bytes) of storing a BitVector with size
398   // |n|. This can be used to make decisions about whether using a BitVector is
399   // worthwhile.
400   // This cost should not be treated as exact - it just gives an indication of
401   // the memory needed.
ApproxBytesCost(uint32_t n)402   static constexpr uint32_t ApproxBytesCost(uint32_t n) {
403     // The two main things making up a BitVector is the cost of the blocks of
404     // bits and the cost of the counts vector.
405     return BlockCount(n) * Block::kBits + BlockCount(n) * sizeof(uint32_t);
406   }
407 
408   // Returns a vector<uint32_t> containing the indices of all the set bits
409   // in the BitVector.
410   std::vector<uint32_t> GetSetBitIndices() const;
411 
412   // Serialize internals of BitVector to proto.
413   void Serialize(protos::pbzero::SerializedColumn_BitVector* msg) const;
414 
415   // Deserialize BitVector from proto.
416   void Deserialize(
417       const protos::pbzero::SerializedColumn_BitVector_Decoder& bv_msg);
418 
419  private:
420   using SetBitsIterator = internal::SetBitsIterator;
421   friend class internal::BaseIterator;
422   friend class internal::SetBitsIterator;
423 
424   // Represents the offset of a bit within a block.
425   struct BlockOffset {
426     uint16_t word_idx;
427     uint16_t bit_idx;
428   };
429 
430   // Represents the address of a bit within the BitVector.
431   struct Address {
432     uint32_t block_idx;
433     BlockOffset block_offset;
434   };
435 
436   // Represents the smallest collection of bits we can refer to as
437   // one unit.
438   //
439   // Currently, this is implemented as a 64 bit integer as this is the
440   // largest type which we can assume to be present on all platforms.
441   class BitWord {
442    public:
443     static constexpr uint32_t kBits = 64;
444 
BitWord(uint64_t * word)445     explicit BitWord(uint64_t* word) : word_(word) {}
446 
447     // Bitwise ors the given |mask| to the current value.
Or(uint64_t mask)448     void Or(uint64_t mask) { *word_ |= mask; }
449 
450     // Bitwise ands the given |mask| to the current value.
And(uint64_t mask)451     void And(uint64_t mask) { *word_ &= mask; }
452 
453     // Bitwise not.
Not()454     void Not() { *word_ = ~(*word_); }
455 
456     // Sets the bit at the given index to true.
Set(uint32_t idx)457     void Set(uint32_t idx) {
458       PERFETTO_DCHECK(idx < kBits);
459 
460       // Or the value for the true shifted up to |idx| with the word.
461       Or(1ull << idx);
462     }
463 
464     // Sets the bit at the given index to false.
Clear(uint32_t idx)465     void Clear(uint32_t idx) {
466       PERFETTO_DCHECK(idx < kBits);
467 
468       // And the integer of all bits set apart from |idx| with the word.
469       *word_ &= ~(1ull << idx);
470     }
471 
472     // Clears all the bits (i.e. sets the atom to zero).
ClearAll()473     void ClearAll() { *word_ = 0; }
474 
475     // Retains all bits up to and including the bit at |idx| and clears
476     // all bits after this point.
ClearAfter(uint32_t idx)477     void ClearAfter(uint32_t idx) {
478       PERFETTO_DCHECK(idx < kBits);
479       *word_ = WordUntil(idx);
480     }
481 
482     // Sets all bits between the bit at |start| and |end| (inclusive).
Set(uint32_t start,uint32_t end)483     void Set(uint32_t start, uint32_t end) {
484       uint32_t diff = end - start;
485       *word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start));
486     }
487 
488     // Return a mask of all the bits up to and including bit at |idx|.
MaskAllBitsSetUntil(uint32_t idx)489     static uint64_t MaskAllBitsSetUntil(uint32_t idx) {
490       // Start with 1 and shift it up (idx + 1) bits we get:
491       // top : 00000000010000000
492       uint64_t top = 1ull << ((idx + 1ull) % kBits);
493 
494       // We need to handle the case where idx == 63. In this case |top| will be
495       // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1.
496       // In this case, we actually want top == 0. We can do this by shifting
497       // down by (idx + 1) / kBits - this will be a noop for every index other
498       // than idx == 63. This should also be free on x86 because of the mod
499       // instruction above.
500       top = top >> ((idx + 1) / kBits);
501 
502       // Then if we take away 1, we get precisely the mask we want.
503       return top - 1u;
504     }
505 
506    private:
507     // Returns the bits up to and including the bit at |idx|.
WordUntil(uint32_t idx)508     uint64_t WordUntil(uint32_t idx) const {
509       PERFETTO_DCHECK(idx < kBits);
510 
511       // To understand what is happeninng here, consider an example.
512       // Suppose we want to all the bits up to the 7th bit in the atom
513       //               7th
514       //                |
515       //                v
516       // atom: 01010101011111000
517       //
518       // The easiest way to do this would be if we had a mask with only
519       // the bottom 7 bits set:
520       // mask: 00000000001111111
521       uint64_t mask = MaskAllBitsSetUntil(idx);
522 
523       // Finish up by and'ing the atom with the computed mask.
524       return *word_ & mask;
525     }
526 
527     uint64_t* word_;
528   };
529 
530   class ConstBitWord {
531    public:
532     static constexpr uint32_t kBits = 64;
533 
ConstBitWord(const uint64_t * word)534     explicit ConstBitWord(const uint64_t* word) : word_(word) {}
535 
536     // Returns whether the bit at the given index is set.
IsSet(uint32_t idx)537     bool IsSet(uint32_t idx) const {
538       PERFETTO_DCHECK(idx < kBits);
539       return (*word_ >> idx) & 1ull;
540     }
541 
542     // Returns the index of the nth set bit.
543     // Undefined if |n| >= |CountSetBits()|.
IndexOfNthSet(uint32_t n)544     uint16_t IndexOfNthSet(uint32_t n) const {
545       PERFETTO_DCHECK(n < kBits);
546 
547       // The below code is very dense but essentially computes the nth set
548       // bit inside |atom| in the "broadword" style of programming (sometimes
549       // referred to as "SIMD within a register").
550       //
551       // Instead of treating a uint64 as an individual unit, broadword
552       // algorithms treat them as a packed vector of uint8. By doing this, they
553       // allow branchless algorithms when considering bits of a uint64.
554       //
555       // In benchmarks, this algorithm has found to be the fastest, portable
556       // way of computing the nth set bit (if we were only targetting new
557       // versions of x64, we could also use pdep + ctz but unfortunately
558       // this would fail on WASM - this about 2.5-3x faster on x64).
559       //
560       // The code below was taken from the paper
561       // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf
562       uint64_t s = *word_ - ((*word_ & 0xAAAAAAAAAAAAAAAA) >> 1);
563       s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333);
564       s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8;
565 
566       uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull;
567       uint64_t l = n - ((s << 8) >> b & 0xFF);
568       s = (BwGtZero(((*word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) *
569           L8;
570 
571       uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56);
572 
573       return static_cast<uint16_t>(ret);
574     }
575 
576     // Returns the number of set bits.
CountSetBits()577     uint32_t CountSetBits() const {
578       return static_cast<uint32_t>(PERFETTO_POPCOUNT(*word_));
579     }
580 
581     // Returns the number of set bits up to and including the bit at |idx|.
CountSetBits(uint32_t idx)582     uint32_t CountSetBits(uint32_t idx) const {
583       PERFETTO_DCHECK(idx < kBits);
584       return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx)));
585     }
586 
587    private:
588     // Constant with all the low bit of every byte set.
589     static constexpr uint64_t L8 = 0x0101010101010101;
590 
591     // Constant with all the high bit of every byte set.
592     static constexpr uint64_t H8 = 0x8080808080808080;
593 
594     // Returns a packed uint64 encoding whether each byte of x is less
595     // than each corresponding byte of y.
596     // This is computed in the "broadword" style of programming; see
597     // IndexOfNthSet for details on this.
BwLessThan(uint64_t x,uint64_t y)598     static uint64_t BwLessThan(uint64_t x, uint64_t y) {
599       return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8;
600     }
601 
602     // Returns a packed uint64 encoding whether each byte of x is greater
603     // than or equal zero.
604     // This is computed in the "broadword" style of programming; see
605     // IndexOfNthSet for details on this.
BwGtZero(uint64_t x)606     static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; }
607 
608     // Returns the bits up to and including the bit at |idx|.
WordUntil(uint32_t idx)609     uint64_t WordUntil(uint32_t idx) const {
610       PERFETTO_DCHECK(idx < kBits);
611 
612       // To understand what is happeninng here, consider an example.
613       // Suppose we want to all the bits up to the 7th bit in the atom
614       //               7th
615       //                |
616       //                v
617       // atom: 01010101011111000
618       //
619       // The easiest way to do this would be if we had a mask with only
620       // the bottom 7 bits set:
621       // mask: 00000000001111111
622       uint64_t mask = BitWord::MaskAllBitsSetUntil(idx);
623 
624       // Finish up by and'ing the atom with the computed mask.
625       return *word_ & mask;
626     }
627 
628     const uint64_t* word_;
629   };
630 
631   // Represents a group of bits with a bitcount such that it is
632   // efficient to work on these bits.
633   //
634   // On x86 architectures we generally target for trace processor, the
635   // size of a cache line is 64 bytes (or 512 bits). For this reason,
636   // we make the size of the block contain 8 atoms as 8 * 64 == 512.
637   class Block {
638    public:
639     // See class documentation for how these constants are chosen.
640     static constexpr uint16_t kWords = 8;
641     static constexpr uint32_t kBits = kWords * BitWord::kBits;
642 
Block(uint64_t * start_word)643     explicit Block(uint64_t* start_word) : start_word_(start_word) {}
644 
645     // Sets the bit at the given address to true.
Set(const BlockOffset & addr)646     void Set(const BlockOffset& addr) {
647       PERFETTO_DCHECK(addr.word_idx < kWords);
648       BitWord(&start_word_[addr.word_idx]).Set(addr.bit_idx);
649     }
650 
651     // Sets the bit at the given address to false.
Clear(const BlockOffset & addr)652     void Clear(const BlockOffset& addr) {
653       PERFETTO_DCHECK(addr.word_idx < kWords);
654 
655       BitWord(&start_word_[addr.word_idx]).Clear(addr.bit_idx);
656     }
657 
658     // Retains all bits up to and including the bit at |addr| and clears
659     // all bits after this point.
ClearAfter(const BlockOffset & offset)660     void ClearAfter(const BlockOffset& offset) {
661       PERFETTO_DCHECK(offset.word_idx < kWords);
662 
663       // In the first atom, keep the bits until the address specified.
664       BitWord(&start_word_[offset.word_idx]).ClearAfter(offset.bit_idx);
665 
666       // For all subsequent atoms, we just clear the whole atom.
667       for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) {
668         BitWord(&start_word_[i]).ClearAll();
669       }
670     }
671 
672     // Set all the bits between the offsets given by |start| and |end|
673     // (inclusive).
Set(const BlockOffset & start,const BlockOffset & end)674     void Set(const BlockOffset& start, const BlockOffset& end) {
675       if (start.word_idx == end.word_idx) {
676         // If there is only one word we will change, just set the range within
677         // the word.
678         BitWord(&start_word_[start.word_idx]).Set(start.bit_idx, end.bit_idx);
679         return;
680       }
681 
682       // Otherwise, we have more than one word to set. To do this, we will
683       // do this in three steps.
684 
685       // First, we set the first word from the start to the end of the word.
686       BitWord(&start_word_[start.word_idx])
687           .Set(start.bit_idx, BitWord::kBits - 1);
688 
689       // Next, we set all words (except the last).
690       for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) {
691         BitWord(&start_word_[i]).Set(0, BitWord::kBits - 1);
692       }
693 
694       // Finally, we set the word block from the start to the end offset.
695       BitWord(&start_word_[end.word_idx]).Set(0, end.bit_idx);
696     }
697 
Or(Block & sec)698     void Or(Block& sec) {
699       for (uint32_t i = 0; i < kWords; ++i) {
700         BitWord(&start_word_[i]).Or(sec.start_word_[i]);
701       }
702     }
703 
704     template <typename Filler>
FromFiller(uint32_t offset,Filler f)705     void FromFiller(uint32_t offset, Filler f) {
706       // We choose to iterate the bits as the outer loop as this allows us
707       // to reuse the mask and the bit offset between iterations of the loop.
708       // This makes a small (but noticable) impact in the performance of this
709       // function.
710       for (uint32_t i = 0; i < BitWord::kBits; ++i) {
711         uint64_t mask = 1ull << i;
712         uint32_t offset_with_bit = offset + i;
713         for (uint32_t j = 0; j < Block::kWords; ++j) {
714           bool res = f(offset_with_bit + j * BitWord::kBits);
715           BitWord(&start_word_[j]).Or(res ? mask : 0);
716         }
717       }
718     }
719 
ReplaceWith(Block block)720     void ReplaceWith(Block block) {
721       for (uint32_t i = 0; i < BitWord::kBits; ++i) {
722         start_word_[i] = block.start_word()[i];
723       }
724     }
725 
start_word()726     uint64_t* start_word() { return start_word_; }
727 
728    private:
729     uint64_t* start_word_;
730   };
731 
732   class ConstBlock {
733    public:
734     // See class documentation for how these constants are chosen.
735     static constexpr uint16_t kWords = Block::kWords;
736     static constexpr uint32_t kBits = kWords * BitWord::kBits;
737 
ConstBlock(const uint64_t * start_word)738     explicit ConstBlock(const uint64_t* start_word) : start_word_(start_word) {}
ConstBlock(Block block)739     explicit ConstBlock(Block block) : start_word_(block.start_word()) {}
740 
741     // Returns whether the bit at the given address is set.
IsSet(const BlockOffset & addr)742     bool IsSet(const BlockOffset& addr) const {
743       PERFETTO_DCHECK(addr.word_idx < kWords);
744       return ConstBitWord(start_word_ + addr.word_idx).IsSet(addr.bit_idx);
745     }
746 
747     // Gets the offset of the nth set bit in this block.
IndexOfNthSet(uint32_t n)748     BlockOffset IndexOfNthSet(uint32_t n) const {
749       uint32_t count = 0;
750       for (uint16_t i = 0; i < kWords; ++i) {
751         // Keep a running count of all the set bits in the atom.
752         uint32_t value = count + ConstBitWord(start_word_ + i).CountSetBits();
753         if (value <= n) {
754           count = value;
755           continue;
756         }
757 
758         // The running count of set bits is more than |n|. That means this
759         // atom contains the bit we are looking for.
760 
761         // Take away the number of set bits to the start of this atom from
762         // |n|.
763         uint32_t set_in_atom = n - count;
764 
765         // Figure out the index of the set bit inside the atom and create the
766         // address of this bit from that.
767         uint16_t bit_idx =
768             ConstBitWord(start_word_ + i).IndexOfNthSet(set_in_atom);
769         PERFETTO_DCHECK(bit_idx < 64);
770         return BlockOffset{i, bit_idx};
771       }
772       PERFETTO_FATAL("Index out of bounds");
773     }
774 
775     // Gets the number of set bits within a block up to and including the bit
776     // at the given address.
CountSetBits(const BlockOffset & addr)777     uint32_t CountSetBits(const BlockOffset& addr) const {
778       PERFETTO_DCHECK(addr.word_idx < kWords);
779 
780       // Count all the set bits in the atom until we reach the last atom
781       // index.
782       uint32_t count = 0;
783       for (uint32_t i = 0; i < addr.word_idx; ++i) {
784         count += ConstBitWord(&start_word_[i]).CountSetBits();
785       }
786 
787       // For the last atom, only count the bits upto and including the bit
788       // index.
789       return count + ConstBitWord(&start_word_[addr.word_idx])
790                          .CountSetBits(addr.bit_idx);
791     }
792 
793     // Gets the number of set bits within a block up.
CountSetBits()794     uint32_t CountSetBits() const {
795       uint32_t count = 0;
796       for (uint32_t i = 0; i < kWords; ++i) {
797         count += ConstBitWord(&start_word_[i]).CountSetBits();
798       }
799       return count;
800     }
801 
802    private:
803     const uint64_t* start_word_;
804   };
805 
806   BitVector(std::vector<uint64_t> words,
807             std::vector<uint32_t> counts,
808             uint32_t size);
809 
810   // Returns the number of 8 elements blocks in the BitVector.
BlockCount()811   uint32_t BlockCount() {
812     return static_cast<uint32_t>(words_.size()) / Block::kWords;
813   }
814 
BlockFromIndex(uint32_t idx)815   Block BlockFromIndex(uint32_t idx) {
816     PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size());
817 
818     uint64_t* start_word = &words_[Block::kWords * idx];
819     return Block(start_word);
820   }
821 
ConstBlockFromIndex(uint32_t idx)822   ConstBlock ConstBlockFromIndex(uint32_t idx) const {
823     PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size());
824 
825     return ConstBlock(&words_[Block::kWords * idx]);
826   }
827 
828   // Set all the bits between the addresses given by |start| and |end|
829   // (inclusive).
830   // Note: this method does not update the counts vector - that is the
831   // responsibility of the caller.
Set(const Address & start,const Address & end)832   void Set(const Address& start, const Address& end) {
833     static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0};
834     static constexpr BlockOffset kLastBlockOffset =
835         BlockOffset{Block::kWords - 1, BitWord::kBits - 1};
836 
837     if (start.block_idx == end.block_idx) {
838       // If there is only one block we will change, just set the range within
839       // the block.
840       BlockFromIndex(start.block_idx).Set(start.block_offset, end.block_offset);
841       return;
842     }
843 
844     // Otherwise, we have more than one block to set. To do this, we will
845     // do this in three steps.
846 
847     // First, we set the first block from the start to the end of the block.
848     BlockFromIndex(start.block_idx).Set(start.block_offset, kLastBlockOffset);
849 
850     // Next, we set all blocks (except the last).
851     for (uint32_t cur_block_idx = start.block_idx + 1;
852          cur_block_idx < end.block_idx; ++cur_block_idx) {
853       BlockFromIndex(cur_block_idx).Set(kFirstBlockOffset, kLastBlockOffset);
854     }
855 
856     // Finally, we set the last block from the start to the end offset.
857     BlockFromIndex(end.block_idx).Set(kFirstBlockOffset, end.block_offset);
858   }
859 
860   // Helper function to append a bit. Generally, prefer to call AppendTrue
861   // or AppendFalse instead of this function if you know the type - they will
862   // be faster.
Append(bool value)863   void Append(bool value) {
864     if (value) {
865       AppendTrue();
866     } else {
867       AppendFalse();
868     }
869   }
870 
871   // Iterate all the set bits in the BitVector.
872   //
873   // Usage:
874   // for (auto it = bv.IterateSetBits(); it; it.Next()) {
875   //   ...
876   // }
877   SetBitsIterator IterateSetBits() const;
878 
879   // Returns the index of the word which would store |idx|.
WordFloor(uint32_t idx)880   static constexpr uint32_t WordFloor(uint32_t idx) {
881     return idx / BitWord::kBits;
882   }
883 
884   // Returns number of words (int64_t) required to store |bit_count| bits.
WordCount(uint32_t bit_count)885   static uint32_t WordCount(uint32_t bit_count) {
886     // See |BlockCount| for an explanation of this trick.
887     return (bit_count + BitWord::kBits - 1) / BitWord::kBits;
888   }
889 
IndexToAddress(uint32_t idx)890   static Address IndexToAddress(uint32_t idx) {
891     Address a;
892     a.block_idx = idx / Block::kBits;
893 
894     uint16_t bit_idx_inside_block = idx % Block::kBits;
895     a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits;
896     a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits;
897     return a;
898   }
899 
AddressToIndex(Address addr)900   static uint32_t AddressToIndex(Address addr) {
901     return addr.block_idx * Block::kBits +
902            addr.block_offset.word_idx * BitWord::kBits +
903            addr.block_offset.bit_idx;
904   }
905 
906   // Returns number of blocks (arrays of 8 int64_t) required to store
907   // |bit_count| bits.
908   //
909   // This is useful to be able to find indices where "fast" algorithms can
910   // start which work on entire blocks.
BlockCount(uint32_t bit_count)911   static constexpr uint32_t BlockCount(uint32_t bit_count) {
912     // Adding |Block::kBits - 1| gives us a quick way to get the count. We
913     // do this instead of adding 1 at the end because that gives incorrect
914     // answers for bit_count % Block::kBits == 0.
915     return (bit_count + Block::kBits - 1) / Block::kBits;
916   }
917 
918   // Returns the index of the block which would store |idx|.
BlockFloor(uint32_t idx)919   static constexpr uint32_t BlockFloor(uint32_t idx) {
920     return idx / Block::kBits;
921   }
922 
923   // Converts a block index to a index in the BitVector.
BlockToIndex(uint32_t block_idx)924   static constexpr uint32_t BlockToIndex(uint32_t block_idx) {
925     return block_idx * Block::kBits;
926   }
927 
928   // Updates the counts in |counts| by counting the set bits in |words|.
UpdateCounts(const std::vector<uint64_t> & words,std::vector<uint32_t> & counts)929   static void UpdateCounts(const std::vector<uint64_t>& words,
930                            std::vector<uint32_t>& counts) {
931     PERFETTO_CHECK(words.size() == counts.size() * Block::kWords);
932     for (uint32_t i = 1; i < counts.size(); ++i) {
933       counts[i] = counts[i - 1] +
934                   ConstBlock(&words[Block::kWords * (i - 1)]).CountSetBits();
935     }
936   }
937 
938   uint32_t size_ = 0;
939   // See class documentation for how these constants are chosen.
940   static constexpr uint16_t kWordsInBlock = Block::kWords;
941   static constexpr uint32_t kBitsInBlock = kWordsInBlock * BitWord::kBits;
942   std::vector<uint32_t> counts_;
943   std::vector<uint64_t> words_;
944 };
945 
946 }  // namespace trace_processor
947 }  // namespace perfetto
948 
949 #endif  // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_
950