xref: /aosp_15_r20/external/icing/icing/file/posting_list/posting-list-identifier.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
16 #define ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
17 
18 #include <cstdint>
19 
20 #include "icing/file/posting_list/posting-list-common.h"
21 #include "icing/legacy/index/icing-bit-util.h"
22 
23 namespace icing {
24 namespace lib {
25 
26 // 1M blocks * 4K page size = 4GB index
27 inline constexpr int kBlockIndexBits = 20;
28 inline constexpr int kMaxBlockIndex = (1u << kBlockIndexBits) - 1;
29 
30 // Class used to store information necessary to identify any posting list within
31 // the index.
32 //
33 // The 20 leftmost bits in this identifier encode the block index. The 12
34 // rightmost bits encode both the posting list index and the maximum number of
35 // bits required to encode a posting list index on that block.
36 //
37 // Ex. An index block containing a max of 68 posting lists each of size 60
38 // bytes (and thus 7 posting list bits), with a block index of 13 and a posting
39 // list index of 5.
40 //   0000  0000  0000  0000  1101  1111  0000  0101
41 //  |__________block-index_______|__pad__|_pl-index_|
42 //
43 // "pad" is some region starting at kEncodedPostingListIndexBits (12) bit and
44 // continuing rightward until reaching a terminating "0". This padding encodes
45 // the posting list bits value - posting list bits value is the number of bits
46 // after the terminating '0' of the "pad" region.
47 //
48 // This value will eventually be stored in the Main Lexicon.
49 class PostingListIdentifier {
50   // 1 bit is wasted to encode max pl index bits so there can be at most 2^11
51   // posting lists per block. Block size would have to be >=40020 bytes for
52   // there to be more than 2K+ posting lists in a block.
53   static constexpr int kEncodedPostingListIndexBits = 12;
54   static_assert(kEncodedPostingListIndexBits + kBlockIndexBits <=
55                     8 * sizeof(uint32_t),
56                 "Not enough room in PostingListIdentifier value to encode "
57                 "block index and posting list index.");
58 
59  public:
60   static PostingListIdentifier kInvalid;
61 
PostingListIdentifier()62   explicit PostingListIdentifier() { *this = kInvalid; }
63 
64   // 1. block_index - the index of this block within the FlashIndexStorage file
65   // 2. posting_list_index - the index of this posting list within the block
66   // 3. posting_list_index_bits - the number of bits needed to encode the
67   //    largest posting_list_index that this block can have.
PostingListIdentifier(uint32_t block_index,PostingListIndex posting_list_index,int posting_list_index_bits)68   explicit PostingListIdentifier(uint32_t block_index,
69                                  PostingListIndex posting_list_index,
70                                  int posting_list_index_bits) {
71     val_ = 0;
72     BITFIELD_OR(val_, /*offset=*/0, /*len=*/posting_list_index_bits,
73                 /*val=*/static_cast<uint64_t>(posting_list_index));
74     BITFIELD_OR(
75         val_, /*offset=*/posting_list_index_bits + 1,
76         /*len=*/kEncodedPostingListIndexBits - posting_list_index_bits - 1,
77         /*val=*/~0u);
78     BITFIELD_OR(val_, /*offset=*/kEncodedPostingListIndexBits,
79                 /*len=*/kBlockIndexBits,
80                 /*val=*/block_index);
81   }
82 
block_index()83   uint32_t block_index() const {
84     return BITFIELD_GET(val_, kEncodedPostingListIndexBits, kBlockIndexBits);
85   }
86 
posting_list_index()87   PostingListIndex posting_list_index() const {
88     return BITFIELD_GET(val_, 0, posting_list_index_bits());
89   }
90 
91   // Returns the maximum number of bits that a posting list index on the block
92   // referred to by block_index could use.
posting_list_index_bits()93   int posting_list_index_bits() const {
94     for (int bits = kEncodedPostingListIndexBits - 1; bits >= 0; --bits) {
95       if (((1u << bits) & val_) == 0) {
96         // Got to the zero bit. This is the start of pl index.
97         return bits;
98       }
99     }
100     return -1;
101   }
102 
is_valid()103   bool is_valid() const { return *this != kInvalid; }
104 
105   bool operator==(const PostingListIdentifier& rhs) const {
106     return val_ == rhs.val_;
107   }
108   bool operator!=(const PostingListIdentifier& rhs) const {
109     return !(*this == rhs);
110   }
111 
112  private:
113   uint32_t val_;
114 } __attribute__((packed));
115 static_assert(sizeof(PostingListIdentifier) == 4, "");
116 
117 }  //  namespace lib
118 }  //  namespace icing
119 
120 #endif  // ICING_FILE_POSTING_LIST_POSTING_LIST_IDENTIFIER_H_
121