xref: /aosp_15_r20/external/icing/icing/store/corpus-associated-scoring-data.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2020 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #ifndef ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
16 #define ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
17 
18 #include <cstdint>
19 #include <limits>
20 #include <type_traits>
21 
22 #include "icing/legacy/core/icing-packed-pod.h"
23 
24 namespace icing {
25 namespace lib {
26 
27 // This is the cache entity of corpus-associated scores. The ground-truth data
28 // is stored somewhere else. The cache includes:
29 // 1. Number of documents contained in the corpus.
30 //    Positive values are required.
31 // 2. The sum of the documents' lengths, in number of tokens.
32 class CorpusAssociatedScoreData {
33  public:
34   explicit CorpusAssociatedScoreData(int num_docs = 0,
35                                      int64_t sum_length_in_tokens = 0)
sum_length_in_tokens_(sum_length_in_tokens)36       : sum_length_in_tokens_(sum_length_in_tokens), num_docs_(num_docs) {}
37 
38   bool operator==(const CorpusAssociatedScoreData& other) const {
39     return num_docs_ == other.num_docs() &&
40            sum_length_in_tokens_ == other.sum_length_in_tokens();
41   }
42 
num_docs()43   uint32_t num_docs() const { return num_docs_; }
set_num_docs(uint32_t val)44   void set_num_docs(uint32_t val) { num_docs_ = val; }
45 
sum_length_in_tokens()46   uint64_t sum_length_in_tokens() const { return sum_length_in_tokens_; }
set_sum_length_in_tokens(uint64_t val)47   void set_sum_length_in_tokens(uint64_t val) { sum_length_in_tokens_ = val; }
48 
average_doc_length_in_tokens()49   float average_doc_length_in_tokens() const {
50     return sum_length_in_tokens_ / (1.0f + num_docs_);
51   }
52 
53   // Adds a new document.
54   // Adds the document's length to the total length of the corpus,
55   // sum_length_in_tokens_.
AddDocument(uint32_t doc_length_in_tokens)56   void AddDocument(uint32_t doc_length_in_tokens) {
57     ++num_docs_;
58     sum_length_in_tokens_ =
59         (std::numeric_limits<int>::max() - doc_length_in_tokens <
60          sum_length_in_tokens_)
61             ? std::numeric_limits<int>::max()
62             : sum_length_in_tokens_ + doc_length_in_tokens;
63   }
64 
65  private:
66   // The sum total of the length of all documents in the corpus.
67   int sum_length_in_tokens_;
68   int num_docs_;
69 } __attribute__((packed));
70 
71 static_assert(sizeof(CorpusAssociatedScoreData) == 8,
72               "Size of CorpusAssociatedScoreData should be 8");
73 static_assert(icing_is_packed_pod<CorpusAssociatedScoreData>::value,
74               "go/icing-ubsan");
75 
76 }  // namespace lib
77 }  // namespace icing
78 
79 #endif  // ICING_STORE_TYPE_NAMESPACE_ASSOCIATED_SCORING_DATA_H_
80