1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_ 16 #define ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_ 17 18 #include <cstdint> 19 20 #include "icing/file/posting_list/posting-list-common.h" 21 #include "icing/legacy/index/icing-bit-util.h" 22 23 namespace icing { 24 namespace lib { 25 26 // 1M blocks * 4K page size = 4GB index 27 inline constexpr int kBlockIndexBits = 20; 28 inline constexpr int kMaxBlockIndex = (1u << kBlockIndexBits) - 1; 29 30 // Class used to store information necessary to identify any posting list within 31 // the index. 32 // 33 // The 20 leftmost bits in this identifier encode the block index. The 12 34 // rightmost bits encode both the posting list index and the maximum number of 35 // bits required to encode a posting list index on that block. 36 // 37 // Ex. An index block containing a max of 68 posting lists each of size 60 38 // bytes (and thus 7 posting list bits), with a block index of 13 and a posting 39 // list index of 5. 40 // 0000 0000 0000 0000 1101 1111 0000 0101 41 // |__________block-index_______|__pad__|_pl-index_| 42 // 43 // "pad" is some region starting at kEncodedPostingListIndexBits (12) bit and 44 // continuing rightward until reaching a terminating "0". This padding encodes 45 // the posting list bits value - posting list bits value is the number of bits 46 // after the terminating '0' of the "pad" region. 47 // 48 // This value will eventually be stored in the Main Lexicon. 49 class PostingListIdentifier { 50 // 1 bit is wasted to encode max pl index bits so there can be at most 2^11 51 // posting lists per block. Block size would have to be >=40020 bytes for 52 // there to be more than 2K+ posting lists in a block. 53 static constexpr int kEncodedPostingListIndexBits = 12; 54 static_assert(kEncodedPostingListIndexBits + kBlockIndexBits <= 55 8 * sizeof(uint32_t), 56 "Not enough room in PostingListIdentifier value to encode " 57 "block index and posting list index."); 58 59 public: 60 static PostingListIdentifier kInvalid; 61 PostingListIdentifier()62 explicit PostingListIdentifier() { *this = kInvalid; } 63 64 // 1. block_index - the index of this block within the FlashIndexStorage file 65 // 2. posting_list_index - the index of this posting list within the block 66 // 3. posting_list_index_bits - the number of bits needed to encode the 67 // largest posting_list_index that this block can have. PostingListIdentifier(uint32_t block_index,PostingListIndex posting_list_index,int posting_list_index_bits)68 explicit PostingListIdentifier(uint32_t block_index, 69 PostingListIndex posting_list_index, 70 int posting_list_index_bits) { 71 val_ = 0; 72 BITFIELD_OR(val_, /*offset=*/0, /*len=*/posting_list_index_bits, 73 /*val=*/static_cast<uint64_t>(posting_list_index)); 74 BITFIELD_OR( 75 val_, /*offset=*/posting_list_index_bits + 1, 76 /*len=*/kEncodedPostingListIndexBits - posting_list_index_bits - 1, 77 /*val=*/~0u); 78 BITFIELD_OR(val_, /*offset=*/kEncodedPostingListIndexBits, 79 /*len=*/kBlockIndexBits, 80 /*val=*/block_index); 81 } 82 block_index()83 uint32_t block_index() const { 84 return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits); 85 } 86 posting_list_index()87 PostingListIndex posting_list_index() const { 88 return BITFIELD_GET(val_, 0, posting_list_index_bits()); 89 } 90 91 // Returns the maximum number of bits that a posting list index on the block 92 // referred to by block_index could use. posting_list_index_bits()93 int posting_list_index_bits() const { 94 for (int bits = kEncodedPostingListIndexBits - 1; bits >= 0; --bits) { 95 if (((1u << bits) & val_) == 0) { 96 // Got to the zero bit. This is the start of pl index. 97 return bits; 98 } 99 } 100 return -1; 101 } 102 is_valid()103 bool is_valid() const { return *this != kInvalid; } 104 105 bool operator==(const PostingListIdentifier& rhs) const { 106 return val_ == rhs.val_; 107 } 108 bool operator!=(const PostingListIdentifier& rhs) const { 109 return !(*this == rhs); 110 } 111 112 private: 113 uint32_t val_; 114 } __attribute__((packed)); 115 static_assert(sizeof(PostingListIdentifier) == 4, ""); 116 117 } // namespace lib 118 } // namespace icing 119 120 #endif // ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_ 121