1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 18 #define SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 19 20 #include <algorithm> 21 #include <cstdint> 22 #include <initializer_list> 23 #include <iterator> 24 #include <utility> 25 #include <vector> 26 27 #include "perfetto/base/compiler.h" 28 #include "perfetto/base/logging.h" 29 #include "perfetto/public/compiler.h" 30 31 namespace perfetto { 32 namespace protos::pbzero { 33 class SerializedColumn_BitVector; 34 class SerializedColumn_BitVector_Decoder; 35 } // namespace protos::pbzero 36 37 namespace trace_processor { 38 namespace internal { 39 40 class BaseIterator; 41 class SetBitsIterator; 42 43 } // namespace internal 44 45 // A BitVector which compactly stores a vector of bools using a single bit 46 // for each bool. 47 class BitVector { 48 public: 49 static constexpr uint32_t kBitsInWord = 64; 50 51 // Builder class which allows efficiently creating a BitVector by appending 52 // words. Using this class is generally far more efficient than trying to set 53 // bits directly in a BitVector or even appending one bit at a time. 54 class Builder { 55 public: 56 // Creates a Builder for building a BitVector of |size| bits. 57 explicit Builder(uint32_t size, uint32_t skip = 0) words_(BlockCount (size)* Block::kWords)58 : words_(BlockCount(size) * Block::kWords), 59 global_bit_offset_(skip), 60 size_(size), 61 skipped_blocks_(skip / Block::kBits) { 62 PERFETTO_CHECK(global_bit_offset_ <= size_); 63 } 64 65 // Appends a single bit to the builder. 66 // Note: |AppendWord| is far more efficient than this method so should be 67 // preferred. Append(bool value)68 void Append(bool value) { 69 PERFETTO_DCHECK(global_bit_offset_ < size_); 70 71 words_[global_bit_offset_ / BitWord::kBits] |= 72 static_cast<uint64_t>(value) << global_bit_offset_ % BitWord::kBits; 73 global_bit_offset_++; 74 } 75 76 // Appends a whole word to the Builder. Builder has to end on a word 77 // boundary before calling this function. AppendWord(uint64_t word)78 void AppendWord(uint64_t word) { 79 PERFETTO_DCHECK(global_bit_offset_ % BitWord::kBits == 0); 80 PERFETTO_DCHECK(global_bit_offset_ + BitWord::kBits <= size_); 81 82 words_[global_bit_offset_ / BitWord::kBits] = word; 83 global_bit_offset_ += BitWord::kBits; 84 } 85 86 // Creates a BitVector from this Builder. Build()87 BitVector Build() && { 88 if (size_ == 0) 89 return {}; 90 91 std::vector<uint32_t> counts(BlockCount(size_)); 92 PERFETTO_CHECK(skipped_blocks_ <= counts.size()); 93 for (uint32_t i = skipped_blocks_ + 1; i < counts.size(); ++i) { 94 counts[i] = counts[i - 1] + 95 ConstBlock(&words_[Block::kWords * (i - 1)]).CountSetBits(); 96 } 97 return {std::move(words_), std::move(counts), size_}; 98 } 99 100 // Returns the number of bits which are in complete words which can be 101 // appended to this builder before having to fallback to |Append| due to 102 // being close to the end. BitsInCompleteWordsUntilFull()103 uint32_t BitsInCompleteWordsUntilFull() const { 104 uint32_t next_word = WordCount(global_bit_offset_); 105 uint32_t end_word = WordFloor(size_); 106 uint32_t complete_words = next_word < end_word ? end_word - next_word : 0; 107 return complete_words * BitWord::kBits; 108 } 109 110 // Returns the number of bits which should be appended using |Append| either 111 // hitting a word boundary (and thus able to use |AppendWord|) or until the 112 // BitVector is full (i.e. no more Appends should happen), whichever would 113 // happen first. BitsUntilWordBoundaryOrFull()114 uint32_t BitsUntilWordBoundaryOrFull() const { 115 if (global_bit_offset_ == 0 && size_ < BitWord::kBits) { 116 return size_; 117 } 118 uint8_t word_bit_offset = global_bit_offset_ % BitWord::kBits; 119 return std::min(BitsUntilFull(), 120 (BitWord::kBits - word_bit_offset) % BitWord::kBits); 121 } 122 123 // Returns the number of bits which should be appended using |Append| before 124 // hitting a word boundary (and thus able to use |AppendWord|) or until the 125 // BitVector is full (i.e. no more Appends should happen). BitsUntilFull()126 uint32_t BitsUntilFull() const { return size_ - global_bit_offset_; } 127 128 private: 129 std::vector<uint64_t> words_; 130 uint32_t global_bit_offset_ = 0; 131 uint32_t size_ = 0; 132 uint32_t skipped_blocks_ = 0; 133 }; 134 135 // Creates an empty BitVector. 136 BitVector(); 137 138 BitVector(std::initializer_list<bool> init); 139 140 // Creates a BitVector of |count| size filled with |value|. 141 explicit BitVector(uint32_t count, bool value = false); 142 143 BitVector(const BitVector&) = delete; 144 BitVector& operator=(const BitVector&) = delete; 145 146 // Enable moving BitVectors as they have no unmovable state. 147 BitVector(BitVector&&) noexcept = default; 148 BitVector& operator=(BitVector&&) = default; 149 150 // Create a copy of the BitVector. 151 BitVector Copy() const; 152 153 // Bitwise Not of the BitVector. 154 void Not(); 155 156 // Bitwise Or of the BitVector. 157 void Or(const BitVector&); 158 159 // Bitwise And of the BitVector. 160 void And(const BitVector&); 161 162 // Returns the size of the BitVector. size()163 uint32_t size() const { return static_cast<uint32_t>(size_); } 164 165 // Returns whether the bit at |idx| is set. IsSet(uint32_t idx)166 bool IsSet(uint32_t idx) const { 167 PERFETTO_DCHECK(idx < size()); 168 return ConstBitWord(&words_[WordFloor(idx)]).IsSet(idx % BitWord::kBits); 169 } 170 171 // Returns the number of set bits in the BitVector. CountSetBits()172 uint32_t CountSetBits() const { return CountSetBits(size()); } 173 174 // Returns the number of set bits between the start of the BitVector 175 // (inclusive) and the index |end| (exclusive). CountSetBits(uint32_t end)176 uint32_t CountSetBits(uint32_t end) const { 177 if (end == 0) 178 return 0; 179 180 // Although the external interface we present uses an exclusive |end|, 181 // internally it's a lot nicer to work with an inclusive |end| (mainly 182 // because we get block rollovers on exclusive ends which means we need 183 // to have if checks to ensure we don't overflow the number of blocks). 184 Address addr = IndexToAddress(end - 1); 185 186 // Add the number of set bits until the start of the block to the number 187 // of set bits until the end address inside the block. 188 return counts_[addr.block_idx] + 189 ConstBlockFromIndex(addr.block_idx).CountSetBits(addr.block_offset); 190 } 191 192 // Returns the index of the |n|th set bit. Should only be called with |n| < 193 // CountSetBits(). IndexOfNthSet(uint32_t n)194 uint32_t IndexOfNthSet(uint32_t n) const { 195 PERFETTO_DCHECK(n < CountSetBits()); 196 197 // First search for the block which, up until the start of it, has more than 198 // n bits set. Note that this should never return |counts.begin()| as 199 // that should always be 0. 200 // TODO(lalitm): investigate whether we can make this faster with small 201 // binary search followed by a linear search instead of binary searching the 202 // full way. 203 auto it = std::upper_bound(counts_.begin(), counts_.end(), n); 204 PERFETTO_DCHECK(it != counts_.begin()); 205 206 // Go back one block to find the block which has the bit we are looking for. 207 uint32_t block_idx = 208 static_cast<uint32_t>(std::distance(counts_.begin(), it) - 1); 209 210 // Figure out how many set bits forward we are looking inside the block 211 // by taking away the number of bits at the start of the block from n. 212 uint32_t set_in_block = n - counts_[block_idx]; 213 214 // Compute the address of the bit in the block then convert the full 215 // address back to an index. 216 BlockOffset block_offset = 217 ConstBlockFromIndex(block_idx).IndexOfNthSet(set_in_block); 218 return AddressToIndex(Address{block_idx, block_offset}); 219 } 220 221 // Sets the bit at index |idx| to true and returns the previous value. Set(uint32_t idx)222 bool Set(uint32_t idx) { 223 // Set the bit to the correct value inside the block but store the old 224 // bit to help fix the counts. 225 auto addr = IndexToAddress(idx); 226 bool old_value = 227 ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset); 228 229 // If the old value was unset, set the bit and add one to the count. 230 if (PERFETTO_LIKELY(!old_value)) { 231 BlockFromIndex(addr.block_idx).Set(addr.block_offset); 232 233 auto size = static_cast<uint32_t>(counts_.size()); 234 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 235 counts_[i]++; 236 } 237 } 238 return old_value; 239 } 240 241 // Sets the bit at index |idx| to false. Clear(uint32_t idx)242 void Clear(uint32_t idx) { 243 // Set the bit to the correct value inside the block but store the old 244 // bit to help fix the counts. 245 auto addr = IndexToAddress(idx); 246 bool old_value = 247 ConstBlockFromIndex(addr.block_idx).IsSet(addr.block_offset); 248 249 // If the old value was set, clear the bit and subtract one from all the 250 // counts. 251 if (PERFETTO_LIKELY(old_value)) { 252 BlockFromIndex(addr.block_idx).Clear(addr.block_offset); 253 254 auto size = static_cast<uint32_t>(counts_.size()); 255 for (uint32_t i = addr.block_idx + 1; i < size; ++i) { 256 counts_[i]--; 257 } 258 } 259 } 260 261 // Appends true to the BitVector. AppendTrue()262 void AppendTrue() { 263 AppendFalse(); 264 Address addr = IndexToAddress(size() - 1); 265 BlockFromIndex(addr.block_idx).Set(addr.block_offset); 266 } 267 268 // Appends false to the BitVector. AppendFalse()269 void AppendFalse() { 270 Address addr = IndexToAddress(size_); 271 uint32_t old_blocks_size = BlockCount(); 272 uint32_t new_blocks_size = addr.block_idx + 1; 273 274 if (PERFETTO_UNLIKELY(new_blocks_size > old_blocks_size)) { 275 uint32_t t = CountSetBits(); 276 words_.resize(words_.size() + Block::kWords); 277 counts_.emplace_back(t); 278 } 279 280 size_++; 281 // We don't need to clear the bit as we ensure that anything after 282 // size_ is always set to false. 283 } 284 285 // Resizes the BitVector to the given |size|. 286 // Truncates the BitVector if |size| < |size()| or fills the new space with 287 // |filler| if |size| > |size()|. Calling this method is a noop if |size| == 288 // |size()|. 289 void Resize(uint32_t new_size, bool filler = false); 290 291 // Creates a BitVector of size |end| with the bits between |start| and |end| 292 // filled by calling the filler function |f(index of bit)|. 293 // 294 // As an example, suppose RangeForTesting(3, 7, [](x) { return x < 5 }). This 295 // would result in the following BitVector: [0 0 0 1 1 0 0] 296 template <typename Filler = bool(uint32_t)> RangeForTesting(uint32_t start,uint32_t end,Filler f)297 PERFETTO_WARN_UNUSED_RESULT static BitVector RangeForTesting(uint32_t start, 298 uint32_t end, 299 Filler f) { 300 // Compute the block index and BitVector index where we start and end 301 // working one block at a time. 302 uint32_t start_fast_block = BlockCount(start); 303 uint32_t start_fast_idx = BlockToIndex(start_fast_block); 304 BitVector bv(start, false); 305 306 // Minimum value of start_fast_idx is numer of bits in block, so we need to 307 // seperate calculation for shorter ranges. 308 if (start_fast_idx > end) { 309 for (uint32_t i = start; i < end; ++i) { 310 bv.Append(f(i)); 311 } 312 return bv; 313 } 314 315 uint32_t end_fast_block = BlockFloor(end); 316 uint32_t end_fast_idx = BlockToIndex(end_fast_block); 317 318 // Fill up to |start_fast_index| with values from the filler. 319 for (uint32_t i = start; i < start_fast_idx; ++i) { 320 bv.Append(f(i)); 321 } 322 323 // Assert words_ vector is full and size_ is properly calculated. 324 PERFETTO_DCHECK(bv.words_.size() % Block::kWords == 0); 325 PERFETTO_DCHECK(bv.words_.size() * BitWord::kBits == bv.size_); 326 327 // At this point we can work one block at a time. 328 bv.words_.resize(bv.words_.size() + 329 Block::kWords * (end_fast_block - start_fast_block)); 330 for (uint32_t i = start_fast_block; i < end_fast_block; ++i) { 331 uint64_t* block_start_word = &bv.words_[i * Block::kWords]; 332 Block(block_start_word).FromFiller(bv.size_, f); 333 bv.counts_.emplace_back(bv.CountSetBits()); 334 bv.size_ += Block::kBits; 335 } 336 337 // Add the last few elements to finish up to |end|. 338 for (uint32_t i = end_fast_idx; i < end; ++i) { 339 bv.Append(f(i)); 340 } 341 342 return bv; 343 } 344 345 // Creates BitVector from a vector of sorted indices. Set bits in the 346 // resulting BitVector are values from the index vector. 347 // Note for callers - the passed index vector has to: 348 // - be sorted 349 // - have first element >= 0 350 // - last value smaller than numeric limit of uint32_t. 351 PERFETTO_WARN_UNUSED_RESULT static BitVector FromSortedIndexVector( 352 const std::vector<int64_t>&); 353 354 // Creates BitVector from a vector of unsorted indices. Set bits in the 355 // resulting BitVector are values from the index vector. 356 PERFETTO_WARN_UNUSED_RESULT static BitVector FromUnsortedIndexVector( 357 const std::vector<uint32_t>&); 358 359 // Creates a BitVector of size `min(range_end, size())` with bits between 360 // |start| and |end| filled with corresponding bits from |this| BitVector. 361 PERFETTO_WARN_UNUSED_RESULT BitVector 362 IntersectRange(uint32_t range_start, uint32_t range_end) const; 363 364 // Requests the removal of unused capacity. 365 // Matches the semantics of std::vector::shrink_to_fit. ShrinkToFit()366 void ShrinkToFit() { 367 words_.shrink_to_fit(); 368 counts_.shrink_to_fit(); 369 } 370 371 // Updates the ith set bit of this BitVector with the value of 372 // |other.IsSet(i)|. 373 // 374 // This is the best way to batch update all the bits which are set; for 375 // example when filtering rows, we want to filter all rows which are currently 376 // included but ignore rows which have already been excluded. 377 // 378 // For example suppose the following: 379 // this: 1 1 0 0 1 0 1 380 // other: 0 1 1 0 381 // This will change this to the following: 382 // this: 0 1 0 0 1 0 0 383 void UpdateSetBits(const BitVector& other); 384 385 // For each set bit position in |other|, Selects the value of each bit in 386 // |this| and stores them contiguously in |this|. 387 // 388 // Precondition: |this.size()| <= |other.size()|. 389 // 390 // For example suppose the following: 391 // this: 1 1 0 0 1 0 1 392 // other: 0 1 0 1 0 1 0 0 1 0 393 // |this| will change this to the following: 394 // this: 1 0 0 395 void SelectBits(const BitVector& other); 396 397 // Returns the approximate cost (in bytes) of storing a BitVector with size 398 // |n|. This can be used to make decisions about whether using a BitVector is 399 // worthwhile. 400 // This cost should not be treated as exact - it just gives an indication of 401 // the memory needed. ApproxBytesCost(uint32_t n)402 static constexpr uint32_t ApproxBytesCost(uint32_t n) { 403 // The two main things making up a BitVector is the cost of the blocks of 404 // bits and the cost of the counts vector. 405 return BlockCount(n) * Block::kBits + BlockCount(n) * sizeof(uint32_t); 406 } 407 408 // Returns a vector<uint32_t> containing the indices of all the set bits 409 // in the BitVector. 410 std::vector<uint32_t> GetSetBitIndices() const; 411 412 // Serialize internals of BitVector to proto. 413 void Serialize(protos::pbzero::SerializedColumn_BitVector* msg) const; 414 415 // Deserialize BitVector from proto. 416 void Deserialize( 417 const protos::pbzero::SerializedColumn_BitVector_Decoder& bv_msg); 418 419 private: 420 using SetBitsIterator = internal::SetBitsIterator; 421 friend class internal::BaseIterator; 422 friend class internal::SetBitsIterator; 423 424 // Represents the offset of a bit within a block. 425 struct BlockOffset { 426 uint16_t word_idx; 427 uint16_t bit_idx; 428 }; 429 430 // Represents the address of a bit within the BitVector. 431 struct Address { 432 uint32_t block_idx; 433 BlockOffset block_offset; 434 }; 435 436 // Represents the smallest collection of bits we can refer to as 437 // one unit. 438 // 439 // Currently, this is implemented as a 64 bit integer as this is the 440 // largest type which we can assume to be present on all platforms. 441 class BitWord { 442 public: 443 static constexpr uint32_t kBits = 64; 444 BitWord(uint64_t * word)445 explicit BitWord(uint64_t* word) : word_(word) {} 446 447 // Bitwise ors the given |mask| to the current value. Or(uint64_t mask)448 void Or(uint64_t mask) { *word_ |= mask; } 449 450 // Bitwise ands the given |mask| to the current value. And(uint64_t mask)451 void And(uint64_t mask) { *word_ &= mask; } 452 453 // Bitwise not. Not()454 void Not() { *word_ = ~(*word_); } 455 456 // Sets the bit at the given index to true. Set(uint32_t idx)457 void Set(uint32_t idx) { 458 PERFETTO_DCHECK(idx < kBits); 459 460 // Or the value for the true shifted up to |idx| with the word. 461 Or(1ull << idx); 462 } 463 464 // Sets the bit at the given index to false. Clear(uint32_t idx)465 void Clear(uint32_t idx) { 466 PERFETTO_DCHECK(idx < kBits); 467 468 // And the integer of all bits set apart from |idx| with the word. 469 *word_ &= ~(1ull << idx); 470 } 471 472 // Clears all the bits (i.e. sets the atom to zero). ClearAll()473 void ClearAll() { *word_ = 0; } 474 475 // Retains all bits up to and including the bit at |idx| and clears 476 // all bits after this point. ClearAfter(uint32_t idx)477 void ClearAfter(uint32_t idx) { 478 PERFETTO_DCHECK(idx < kBits); 479 *word_ = WordUntil(idx); 480 } 481 482 // Sets all bits between the bit at |start| and |end| (inclusive). Set(uint32_t start,uint32_t end)483 void Set(uint32_t start, uint32_t end) { 484 uint32_t diff = end - start; 485 *word_ |= (MaskAllBitsSetUntil(diff) << static_cast<uint64_t>(start)); 486 } 487 488 // Return a mask of all the bits up to and including bit at |idx|. MaskAllBitsSetUntil(uint32_t idx)489 static uint64_t MaskAllBitsSetUntil(uint32_t idx) { 490 // Start with 1 and shift it up (idx + 1) bits we get: 491 // top : 00000000010000000 492 uint64_t top = 1ull << ((idx + 1ull) % kBits); 493 494 // We need to handle the case where idx == 63. In this case |top| will be 495 // zero because 1 << ((idx + 1) % 64) == 1 << (64 % 64) == 1. 496 // In this case, we actually want top == 0. We can do this by shifting 497 // down by (idx + 1) / kBits - this will be a noop for every index other 498 // than idx == 63. This should also be free on x86 because of the mod 499 // instruction above. 500 top = top >> ((idx + 1) / kBits); 501 502 // Then if we take away 1, we get precisely the mask we want. 503 return top - 1u; 504 } 505 506 private: 507 // Returns the bits up to and including the bit at |idx|. WordUntil(uint32_t idx)508 uint64_t WordUntil(uint32_t idx) const { 509 PERFETTO_DCHECK(idx < kBits); 510 511 // To understand what is happeninng here, consider an example. 512 // Suppose we want to all the bits up to the 7th bit in the atom 513 // 7th 514 // | 515 // v 516 // atom: 01010101011111000 517 // 518 // The easiest way to do this would be if we had a mask with only 519 // the bottom 7 bits set: 520 // mask: 00000000001111111 521 uint64_t mask = MaskAllBitsSetUntil(idx); 522 523 // Finish up by and'ing the atom with the computed mask. 524 return *word_ & mask; 525 } 526 527 uint64_t* word_; 528 }; 529 530 class ConstBitWord { 531 public: 532 static constexpr uint32_t kBits = 64; 533 ConstBitWord(const uint64_t * word)534 explicit ConstBitWord(const uint64_t* word) : word_(word) {} 535 536 // Returns whether the bit at the given index is set. IsSet(uint32_t idx)537 bool IsSet(uint32_t idx) const { 538 PERFETTO_DCHECK(idx < kBits); 539 return (*word_ >> idx) & 1ull; 540 } 541 542 // Returns the index of the nth set bit. 543 // Undefined if |n| >= |CountSetBits()|. IndexOfNthSet(uint32_t n)544 uint16_t IndexOfNthSet(uint32_t n) const { 545 PERFETTO_DCHECK(n < kBits); 546 547 // The below code is very dense but essentially computes the nth set 548 // bit inside |atom| in the "broadword" style of programming (sometimes 549 // referred to as "SIMD within a register"). 550 // 551 // Instead of treating a uint64 as an individual unit, broadword 552 // algorithms treat them as a packed vector of uint8. By doing this, they 553 // allow branchless algorithms when considering bits of a uint64. 554 // 555 // In benchmarks, this algorithm has found to be the fastest, portable 556 // way of computing the nth set bit (if we were only targetting new 557 // versions of x64, we could also use pdep + ctz but unfortunately 558 // this would fail on WASM - this about 2.5-3x faster on x64). 559 // 560 // The code below was taken from the paper 561 // http://vigna.di.unimi.it/ftp/papers/Broadword.pdf 562 uint64_t s = *word_ - ((*word_ & 0xAAAAAAAAAAAAAAAA) >> 1); 563 s = (s & 0x3333333333333333) + ((s >> 2) & 0x3333333333333333); 564 s = ((s + (s >> 4)) & 0x0F0F0F0F0F0F0F0F) * L8; 565 566 uint64_t b = (BwLessThan(s, n * L8) >> 7) * L8 >> 53 & ~7ull; 567 uint64_t l = n - ((s << 8) >> b & 0xFF); 568 s = (BwGtZero(((*word_ >> b & 0xFF) * L8) & 0x8040201008040201) >> 7) * 569 L8; 570 571 uint64_t ret = b + ((BwLessThan(s, l * L8) >> 7) * L8 >> 56); 572 573 return static_cast<uint16_t>(ret); 574 } 575 576 // Returns the number of set bits. CountSetBits()577 uint32_t CountSetBits() const { 578 return static_cast<uint32_t>(PERFETTO_POPCOUNT(*word_)); 579 } 580 581 // Returns the number of set bits up to and including the bit at |idx|. CountSetBits(uint32_t idx)582 uint32_t CountSetBits(uint32_t idx) const { 583 PERFETTO_DCHECK(idx < kBits); 584 return static_cast<uint32_t>(PERFETTO_POPCOUNT(WordUntil(idx))); 585 } 586 587 private: 588 // Constant with all the low bit of every byte set. 589 static constexpr uint64_t L8 = 0x0101010101010101; 590 591 // Constant with all the high bit of every byte set. 592 static constexpr uint64_t H8 = 0x8080808080808080; 593 594 // Returns a packed uint64 encoding whether each byte of x is less 595 // than each corresponding byte of y. 596 // This is computed in the "broadword" style of programming; see 597 // IndexOfNthSet for details on this. BwLessThan(uint64_t x,uint64_t y)598 static uint64_t BwLessThan(uint64_t x, uint64_t y) { 599 return (((y | H8) - (x & ~H8)) ^ x ^ y) & H8; 600 } 601 602 // Returns a packed uint64 encoding whether each byte of x is greater 603 // than or equal zero. 604 // This is computed in the "broadword" style of programming; see 605 // IndexOfNthSet for details on this. BwGtZero(uint64_t x)606 static uint64_t BwGtZero(uint64_t x) { return (((x | H8) - L8) | x) & H8; } 607 608 // Returns the bits up to and including the bit at |idx|. WordUntil(uint32_t idx)609 uint64_t WordUntil(uint32_t idx) const { 610 PERFETTO_DCHECK(idx < kBits); 611 612 // To understand what is happeninng here, consider an example. 613 // Suppose we want to all the bits up to the 7th bit in the atom 614 // 7th 615 // | 616 // v 617 // atom: 01010101011111000 618 // 619 // The easiest way to do this would be if we had a mask with only 620 // the bottom 7 bits set: 621 // mask: 00000000001111111 622 uint64_t mask = BitWord::MaskAllBitsSetUntil(idx); 623 624 // Finish up by and'ing the atom with the computed mask. 625 return *word_ & mask; 626 } 627 628 const uint64_t* word_; 629 }; 630 631 // Represents a group of bits with a bitcount such that it is 632 // efficient to work on these bits. 633 // 634 // On x86 architectures we generally target for trace processor, the 635 // size of a cache line is 64 bytes (or 512 bits). For this reason, 636 // we make the size of the block contain 8 atoms as 8 * 64 == 512. 637 class Block { 638 public: 639 // See class documentation for how these constants are chosen. 640 static constexpr uint16_t kWords = 8; 641 static constexpr uint32_t kBits = kWords * BitWord::kBits; 642 Block(uint64_t * start_word)643 explicit Block(uint64_t* start_word) : start_word_(start_word) {} 644 645 // Sets the bit at the given address to true. Set(const BlockOffset & addr)646 void Set(const BlockOffset& addr) { 647 PERFETTO_DCHECK(addr.word_idx < kWords); 648 BitWord(&start_word_[addr.word_idx]).Set(addr.bit_idx); 649 } 650 651 // Sets the bit at the given address to false. Clear(const BlockOffset & addr)652 void Clear(const BlockOffset& addr) { 653 PERFETTO_DCHECK(addr.word_idx < kWords); 654 655 BitWord(&start_word_[addr.word_idx]).Clear(addr.bit_idx); 656 } 657 658 // Retains all bits up to and including the bit at |addr| and clears 659 // all bits after this point. ClearAfter(const BlockOffset & offset)660 void ClearAfter(const BlockOffset& offset) { 661 PERFETTO_DCHECK(offset.word_idx < kWords); 662 663 // In the first atom, keep the bits until the address specified. 664 BitWord(&start_word_[offset.word_idx]).ClearAfter(offset.bit_idx); 665 666 // For all subsequent atoms, we just clear the whole atom. 667 for (uint32_t i = offset.word_idx + 1; i < kWords; ++i) { 668 BitWord(&start_word_[i]).ClearAll(); 669 } 670 } 671 672 // Set all the bits between the offsets given by |start| and |end| 673 // (inclusive). Set(const BlockOffset & start,const BlockOffset & end)674 void Set(const BlockOffset& start, const BlockOffset& end) { 675 if (start.word_idx == end.word_idx) { 676 // If there is only one word we will change, just set the range within 677 // the word. 678 BitWord(&start_word_[start.word_idx]).Set(start.bit_idx, end.bit_idx); 679 return; 680 } 681 682 // Otherwise, we have more than one word to set. To do this, we will 683 // do this in three steps. 684 685 // First, we set the first word from the start to the end of the word. 686 BitWord(&start_word_[start.word_idx]) 687 .Set(start.bit_idx, BitWord::kBits - 1); 688 689 // Next, we set all words (except the last). 690 for (uint32_t i = start.word_idx + 1; i < end.word_idx; ++i) { 691 BitWord(&start_word_[i]).Set(0, BitWord::kBits - 1); 692 } 693 694 // Finally, we set the word block from the start to the end offset. 695 BitWord(&start_word_[end.word_idx]).Set(0, end.bit_idx); 696 } 697 Or(Block & sec)698 void Or(Block& sec) { 699 for (uint32_t i = 0; i < kWords; ++i) { 700 BitWord(&start_word_[i]).Or(sec.start_word_[i]); 701 } 702 } 703 704 template <typename Filler> FromFiller(uint32_t offset,Filler f)705 void FromFiller(uint32_t offset, Filler f) { 706 // We choose to iterate the bits as the outer loop as this allows us 707 // to reuse the mask and the bit offset between iterations of the loop. 708 // This makes a small (but noticable) impact in the performance of this 709 // function. 710 for (uint32_t i = 0; i < BitWord::kBits; ++i) { 711 uint64_t mask = 1ull << i; 712 uint32_t offset_with_bit = offset + i; 713 for (uint32_t j = 0; j < Block::kWords; ++j) { 714 bool res = f(offset_with_bit + j * BitWord::kBits); 715 BitWord(&start_word_[j]).Or(res ? mask : 0); 716 } 717 } 718 } 719 ReplaceWith(Block block)720 void ReplaceWith(Block block) { 721 for (uint32_t i = 0; i < BitWord::kBits; ++i) { 722 start_word_[i] = block.start_word()[i]; 723 } 724 } 725 start_word()726 uint64_t* start_word() { return start_word_; } 727 728 private: 729 uint64_t* start_word_; 730 }; 731 732 class ConstBlock { 733 public: 734 // See class documentation for how these constants are chosen. 735 static constexpr uint16_t kWords = Block::kWords; 736 static constexpr uint32_t kBits = kWords * BitWord::kBits; 737 ConstBlock(const uint64_t * start_word)738 explicit ConstBlock(const uint64_t* start_word) : start_word_(start_word) {} ConstBlock(Block block)739 explicit ConstBlock(Block block) : start_word_(block.start_word()) {} 740 741 // Returns whether the bit at the given address is set. IsSet(const BlockOffset & addr)742 bool IsSet(const BlockOffset& addr) const { 743 PERFETTO_DCHECK(addr.word_idx < kWords); 744 return ConstBitWord(start_word_ + addr.word_idx).IsSet(addr.bit_idx); 745 } 746 747 // Gets the offset of the nth set bit in this block. IndexOfNthSet(uint32_t n)748 BlockOffset IndexOfNthSet(uint32_t n) const { 749 uint32_t count = 0; 750 for (uint16_t i = 0; i < kWords; ++i) { 751 // Keep a running count of all the set bits in the atom. 752 uint32_t value = count + ConstBitWord(start_word_ + i).CountSetBits(); 753 if (value <= n) { 754 count = value; 755 continue; 756 } 757 758 // The running count of set bits is more than |n|. That means this 759 // atom contains the bit we are looking for. 760 761 // Take away the number of set bits to the start of this atom from 762 // |n|. 763 uint32_t set_in_atom = n - count; 764 765 // Figure out the index of the set bit inside the atom and create the 766 // address of this bit from that. 767 uint16_t bit_idx = 768 ConstBitWord(start_word_ + i).IndexOfNthSet(set_in_atom); 769 PERFETTO_DCHECK(bit_idx < 64); 770 return BlockOffset{i, bit_idx}; 771 } 772 PERFETTO_FATAL("Index out of bounds"); 773 } 774 775 // Gets the number of set bits within a block up to and including the bit 776 // at the given address. CountSetBits(const BlockOffset & addr)777 uint32_t CountSetBits(const BlockOffset& addr) const { 778 PERFETTO_DCHECK(addr.word_idx < kWords); 779 780 // Count all the set bits in the atom until we reach the last atom 781 // index. 782 uint32_t count = 0; 783 for (uint32_t i = 0; i < addr.word_idx; ++i) { 784 count += ConstBitWord(&start_word_[i]).CountSetBits(); 785 } 786 787 // For the last atom, only count the bits upto and including the bit 788 // index. 789 return count + ConstBitWord(&start_word_[addr.word_idx]) 790 .CountSetBits(addr.bit_idx); 791 } 792 793 // Gets the number of set bits within a block up. CountSetBits()794 uint32_t CountSetBits() const { 795 uint32_t count = 0; 796 for (uint32_t i = 0; i < kWords; ++i) { 797 count += ConstBitWord(&start_word_[i]).CountSetBits(); 798 } 799 return count; 800 } 801 802 private: 803 const uint64_t* start_word_; 804 }; 805 806 BitVector(std::vector<uint64_t> words, 807 std::vector<uint32_t> counts, 808 uint32_t size); 809 810 // Returns the number of 8 elements blocks in the BitVector. BlockCount()811 uint32_t BlockCount() { 812 return static_cast<uint32_t>(words_.size()) / Block::kWords; 813 } 814 BlockFromIndex(uint32_t idx)815 Block BlockFromIndex(uint32_t idx) { 816 PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size()); 817 818 uint64_t* start_word = &words_[Block::kWords * idx]; 819 return Block(start_word); 820 } 821 ConstBlockFromIndex(uint32_t idx)822 ConstBlock ConstBlockFromIndex(uint32_t idx) const { 823 PERFETTO_DCHECK(Block::kWords * (idx + 1) <= words_.size()); 824 825 return ConstBlock(&words_[Block::kWords * idx]); 826 } 827 828 // Set all the bits between the addresses given by |start| and |end| 829 // (inclusive). 830 // Note: this method does not update the counts vector - that is the 831 // responsibility of the caller. Set(const Address & start,const Address & end)832 void Set(const Address& start, const Address& end) { 833 static constexpr BlockOffset kFirstBlockOffset = BlockOffset{0, 0}; 834 static constexpr BlockOffset kLastBlockOffset = 835 BlockOffset{Block::kWords - 1, BitWord::kBits - 1}; 836 837 if (start.block_idx == end.block_idx) { 838 // If there is only one block we will change, just set the range within 839 // the block. 840 BlockFromIndex(start.block_idx).Set(start.block_offset, end.block_offset); 841 return; 842 } 843 844 // Otherwise, we have more than one block to set. To do this, we will 845 // do this in three steps. 846 847 // First, we set the first block from the start to the end of the block. 848 BlockFromIndex(start.block_idx).Set(start.block_offset, kLastBlockOffset); 849 850 // Next, we set all blocks (except the last). 851 for (uint32_t cur_block_idx = start.block_idx + 1; 852 cur_block_idx < end.block_idx; ++cur_block_idx) { 853 BlockFromIndex(cur_block_idx).Set(kFirstBlockOffset, kLastBlockOffset); 854 } 855 856 // Finally, we set the last block from the start to the end offset. 857 BlockFromIndex(end.block_idx).Set(kFirstBlockOffset, end.block_offset); 858 } 859 860 // Helper function to append a bit. Generally, prefer to call AppendTrue 861 // or AppendFalse instead of this function if you know the type - they will 862 // be faster. Append(bool value)863 void Append(bool value) { 864 if (value) { 865 AppendTrue(); 866 } else { 867 AppendFalse(); 868 } 869 } 870 871 // Iterate all the set bits in the BitVector. 872 // 873 // Usage: 874 // for (auto it = bv.IterateSetBits(); it; it.Next()) { 875 // ... 876 // } 877 SetBitsIterator IterateSetBits() const; 878 879 // Returns the index of the word which would store |idx|. WordFloor(uint32_t idx)880 static constexpr uint32_t WordFloor(uint32_t idx) { 881 return idx / BitWord::kBits; 882 } 883 884 // Returns number of words (int64_t) required to store |bit_count| bits. WordCount(uint32_t bit_count)885 static uint32_t WordCount(uint32_t bit_count) { 886 // See |BlockCount| for an explanation of this trick. 887 return (bit_count + BitWord::kBits - 1) / BitWord::kBits; 888 } 889 IndexToAddress(uint32_t idx)890 static Address IndexToAddress(uint32_t idx) { 891 Address a; 892 a.block_idx = idx / Block::kBits; 893 894 uint16_t bit_idx_inside_block = idx % Block::kBits; 895 a.block_offset.word_idx = bit_idx_inside_block / BitWord::kBits; 896 a.block_offset.bit_idx = bit_idx_inside_block % BitWord::kBits; 897 return a; 898 } 899 AddressToIndex(Address addr)900 static uint32_t AddressToIndex(Address addr) { 901 return addr.block_idx * Block::kBits + 902 addr.block_offset.word_idx * BitWord::kBits + 903 addr.block_offset.bit_idx; 904 } 905 906 // Returns number of blocks (arrays of 8 int64_t) required to store 907 // |bit_count| bits. 908 // 909 // This is useful to be able to find indices where "fast" algorithms can 910 // start which work on entire blocks. BlockCount(uint32_t bit_count)911 static constexpr uint32_t BlockCount(uint32_t bit_count) { 912 // Adding |Block::kBits - 1| gives us a quick way to get the count. We 913 // do this instead of adding 1 at the end because that gives incorrect 914 // answers for bit_count % Block::kBits == 0. 915 return (bit_count + Block::kBits - 1) / Block::kBits; 916 } 917 918 // Returns the index of the block which would store |idx|. BlockFloor(uint32_t idx)919 static constexpr uint32_t BlockFloor(uint32_t idx) { 920 return idx / Block::kBits; 921 } 922 923 // Converts a block index to a index in the BitVector. BlockToIndex(uint32_t block_idx)924 static constexpr uint32_t BlockToIndex(uint32_t block_idx) { 925 return block_idx * Block::kBits; 926 } 927 928 // Updates the counts in |counts| by counting the set bits in |words|. UpdateCounts(const std::vector<uint64_t> & words,std::vector<uint32_t> & counts)929 static void UpdateCounts(const std::vector<uint64_t>& words, 930 std::vector<uint32_t>& counts) { 931 PERFETTO_CHECK(words.size() == counts.size() * Block::kWords); 932 for (uint32_t i = 1; i < counts.size(); ++i) { 933 counts[i] = counts[i - 1] + 934 ConstBlock(&words[Block::kWords * (i - 1)]).CountSetBits(); 935 } 936 } 937 938 uint32_t size_ = 0; 939 // See class documentation for how these constants are chosen. 940 static constexpr uint16_t kWordsInBlock = Block::kWords; 941 static constexpr uint32_t kBitsInBlock = kWordsInBlock * BitWord::kBits; 942 std::vector<uint32_t> counts_; 943 std::vector<uint64_t> words_; 944 }; 945 946 } // namespace trace_processor 947 } // namespace perfetto 948 949 #endif // SRC_TRACE_PROCESSOR_CONTAINERS_BIT_VECTOR_H_ 950