xref: /aosp_15_r20/external/icing/icing/index/hit/hit.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/index/hit/hit.h"
16 
17 #include <cstring>
18 #include <limits>
19 
20 #include "icing/schema/section.h"
21 #include "icing/store/document-id.h"
22 #include "icing/util/bit-util.h"
23 #include "icing/util/logging.h"
24 
25 namespace icing {
26 namespace lib {
27 
28 namespace {
29 
InvertDocumentId(DocumentId document_id)30 inline DocumentId InvertDocumentId(DocumentId document_id) {
31   static_assert(kMaxDocumentId <= (std::numeric_limits<DocumentId>::max() - 1),
32                 "(kMaxDocumentId + 1) must not overflow.");
33   static_assert(
34       (kMaxDocumentId + 1) < (1U << kDocumentIdBits),
35       "(kMaxDocumentId + 1) must also fit in kDocumentIdBits wide bitfield");
36   // Invert the document_id value. +1 is added so the resulting range is [1,
37   // kMaxDocumentId + 1].
38   return (kMaxDocumentId + 1) - document_id;
39 }
40 
41 }  // namespace
42 
BasicHit(SectionId section_id,DocumentId document_id)43 BasicHit::BasicHit(SectionId section_id, DocumentId document_id) {
44   // Values are stored so that when sorted, they appear in document_id
45   // descending, section_id ascending, order. So inverted document_id appears in
46   // the most significant bits, followed by (uninverted) section_id.
47   Value temp_value = 0;
48   bit_util::BitfieldSet(/*new_value=*/InvertDocumentId(document_id),
49                         /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits,
50                         /*value_out=*/&temp_value);
51   bit_util::BitfieldSet(/*new_value=*/section_id, /*lsb_offset=*/0,
52                         /*len=*/kSectionIdBits, /*value_out=*/&temp_value);
53   value_ = temp_value;
54 }
55 
document_id() const56 DocumentId BasicHit::document_id() const {
57   DocumentId inverted_document_id = bit_util::BitfieldGet(
58       value_, /*lsb_offset=*/kSectionIdBits, /*len=*/kDocumentIdBits);
59   // Undo the document_id inversion.
60   return InvertDocumentId(inverted_document_id);
61 }
62 
section_id() const63 SectionId BasicHit::section_id() const {
64   return bit_util::BitfieldGet(value_, /*lsb_offset=*/0,
65                                /*len=*/kSectionIdBits);
66 }
67 
Hit(Value value,Flags flags,TermFrequency term_frequency)68 Hit::Hit(Value value, Flags flags, TermFrequency term_frequency)
69     : flags_(flags), term_frequency_(term_frequency) {
70   memcpy(value_.data(), &value, sizeof(value));
71   if (!CheckFlagsAreConsistent()) {
72     ICING_VLOG(1)
73         << "Creating Hit that has inconsistent flag values across its fields: "
74         << "Hit(value=" << value << ", flags=" << flags
75         << "term_frequency=" << term_frequency << ")";
76   }
77 }
78 
Hit(SectionId section_id,DocumentId document_id,Hit::TermFrequency term_frequency,bool is_in_prefix_section,bool is_prefix_hit,bool is_stemmed_hit)79 Hit::Hit(SectionId section_id, DocumentId document_id,
80          Hit::TermFrequency term_frequency, bool is_in_prefix_section,
81          bool is_prefix_hit, bool is_stemmed_hit)
82     : term_frequency_(term_frequency) {
83   // We compute flags first as the value's has_flags bit depends on the flags_
84   // field.
85   Flags temp_flags = 0;
86   bit_util::BitfieldSet(term_frequency != kDefaultTermFrequency,
87                         kHasTermFrequency, /*len=*/1, &temp_flags);
88   bit_util::BitfieldSet(is_stemmed_hit, kIsStemmedHit, /*len=*/1, &temp_flags);
89   flags_ = temp_flags;
90 
91   // Values are stored so that when sorted, they appear in document_id
92   // descending, section_id ascending, order. Also, all else being
93   // equal, non-prefix hits sort before prefix hits. So inverted
94   // document_id appears in the most significant bits, followed by
95   // (uninverted) section_id.
96   Value temp_value = 0;
97   bit_util::BitfieldSet(InvertDocumentId(document_id),
98                         kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits,
99                         &temp_value);
100   bit_util::BitfieldSet(section_id, kNumFlagsInValueField, kSectionIdBits,
101                         &temp_value);
102   bit_util::BitfieldSet(is_prefix_hit, kPrefixHit, /*len=*/1, &temp_value);
103   bit_util::BitfieldSet(is_in_prefix_section, kInPrefixSection,
104                         /*len=*/1, &temp_value);
105 
106   bool has_flags = flags_ != kNoEnabledFlags;
107   bit_util::BitfieldSet(has_flags, kHasFlags, /*len=*/1, &temp_value);
108 
109   memcpy(value_.data(), &temp_value, sizeof(temp_value));
110 }
111 
document_id() const112 DocumentId Hit::document_id() const {
113   DocumentId inverted_document_id = bit_util::BitfieldGet(
114       value(), kSectionIdBits + kNumFlagsInValueField, kDocumentIdBits);
115   // Undo the document_id inversion.
116   return InvertDocumentId(inverted_document_id);
117 }
118 
section_id() const119 SectionId Hit::section_id() const {
120   return bit_util::BitfieldGet(value(), kNumFlagsInValueField, kSectionIdBits);
121 }
122 
is_prefix_hit() const123 bool Hit::is_prefix_hit() const {
124   return bit_util::BitfieldGet(value(), kPrefixHit, 1);
125 }
126 
is_in_prefix_section() const127 bool Hit::is_in_prefix_section() const {
128   return bit_util::BitfieldGet(value(), kInPrefixSection, 1);
129 }
130 
has_flags() const131 bool Hit::has_flags() const {
132   return bit_util::BitfieldGet(value(), kHasFlags, 1);
133 }
134 
has_term_frequency() const135 bool Hit::has_term_frequency() const {
136   return bit_util::BitfieldGet(flags(), kHasTermFrequency, 1);
137 }
138 
is_stemmed_hit() const139 bool Hit::is_stemmed_hit() const {
140   return bit_util::BitfieldGet(flags(), kIsStemmedHit, 1);
141 }
142 
CheckFlagsAreConsistent() const143 bool Hit::CheckFlagsAreConsistent() const {
144   bool has_flags = flags_ != kNoEnabledFlags;
145   bool has_flags_enabled_in_value =
146       bit_util::BitfieldGet(value(), kHasFlags, /*len=*/1);
147 
148   bool has_term_frequency = term_frequency_ != kDefaultTermFrequency;
149   bool has_term_frequency_enabled_in_flags =
150       bit_util::BitfieldGet(flags(), kHasTermFrequency, /*len=*/1);
151 
152   return has_flags == has_flags_enabled_in_value &&
153          has_term_frequency == has_term_frequency_enabled_in_flags;
154 }
155 
TranslateHit(Hit old_hit,DocumentId new_document_id)156 Hit Hit::TranslateHit(Hit old_hit, DocumentId new_document_id) {
157   return Hit(old_hit.section_id(), new_document_id, old_hit.term_frequency(),
158              old_hit.is_in_prefix_section(), old_hit.is_prefix_hit(),
159              old_hit.is_stemmed_hit());
160 }
161 
162 }  // namespace lib
163 }  // namespace icing
164