1*8b6cd535SAndroid Build Coastguard Worker // Copyright (C) 2019 Google LLC
2*8b6cd535SAndroid Build Coastguard Worker //
3*8b6cd535SAndroid Build Coastguard Worker // Licensed under the Apache License, Version 2.0 (the "License");
4*8b6cd535SAndroid Build Coastguard Worker // you may not use this file except in compliance with the License.
5*8b6cd535SAndroid Build Coastguard Worker // You may obtain a copy of the License at
6*8b6cd535SAndroid Build Coastguard Worker //
7*8b6cd535SAndroid Build Coastguard Worker // http://www.apache.org/licenses/LICENSE-2.0
8*8b6cd535SAndroid Build Coastguard Worker //
9*8b6cd535SAndroid Build Coastguard Worker // Unless required by applicable law or agreed to in writing, software
10*8b6cd535SAndroid Build Coastguard Worker // distributed under the License is distributed on an "AS IS" BASIS,
11*8b6cd535SAndroid Build Coastguard Worker // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12*8b6cd535SAndroid Build Coastguard Worker // See the License for the specific language governing permissions and
13*8b6cd535SAndroid Build Coastguard Worker // limitations under the License.
14*8b6cd535SAndroid Build Coastguard Worker
15*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/lite/lite-index-options.h"
16*8b6cd535SAndroid Build Coastguard Worker
17*8b6cd535SAndroid Build Coastguard Worker #include <algorithm>
18*8b6cd535SAndroid Build Coastguard Worker #include <cstddef>
19*8b6cd535SAndroid Build Coastguard Worker #include <cstdint>
20*8b6cd535SAndroid Build Coastguard Worker #include <string>
21*8b6cd535SAndroid Build Coastguard Worker
22*8b6cd535SAndroid Build Coastguard Worker #include "icing/index/lite/term-id-hit-pair.h"
23*8b6cd535SAndroid Build Coastguard Worker #include "icing/legacy/index/icing-dynamic-trie.h"
24*8b6cd535SAndroid Build Coastguard Worker
25*8b6cd535SAndroid Build Coastguard Worker namespace icing {
26*8b6cd535SAndroid Build Coastguard Worker namespace lib {
27*8b6cd535SAndroid Build Coastguard Worker
28*8b6cd535SAndroid Build Coastguard Worker namespace {
29*8b6cd535SAndroid Build Coastguard Worker
30*8b6cd535SAndroid Build Coastguard Worker constexpr int kIcingMaxVariantsPerToken = 10; // Maximum number of variants
31*8b6cd535SAndroid Build Coastguard Worker
32*8b6cd535SAndroid Build Coastguard Worker constexpr size_t kIcingMaxSearchableDocumentSize = (1u << 16) - 1; // 64K
33*8b6cd535SAndroid Build Coastguard Worker // Max num tokens per document. 64KB is our original maximum (searchable)
34*8b6cd535SAndroid Build Coastguard Worker // document size. We clip if document exceeds this.
35*8b6cd535SAndroid Build Coastguard Worker constexpr uint32_t kIcingMaxNumTokensPerDoc =
36*8b6cd535SAndroid Build Coastguard Worker kIcingMaxSearchableDocumentSize / 5;
37*8b6cd535SAndroid Build Coastguard Worker constexpr uint32_t kIcingMaxNumHitsPerDocument =
38*8b6cd535SAndroid Build Coastguard Worker kIcingMaxNumTokensPerDoc * kIcingMaxVariantsPerToken;
39*8b6cd535SAndroid Build Coastguard Worker
CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes)40*8b6cd535SAndroid Build Coastguard Worker uint32_t CalculateHitBufferSize(uint32_t hit_buffer_want_merge_bytes) {
41*8b6cd535SAndroid Build Coastguard Worker constexpr uint32_t kHitBufferSlopMult = 2;
42*8b6cd535SAndroid Build Coastguard Worker
43*8b6cd535SAndroid Build Coastguard Worker // Add a 2x slop for the hit buffer. We need to make sure we can at
44*8b6cd535SAndroid Build Coastguard Worker // least fit one document with index variants.
45*8b6cd535SAndroid Build Coastguard Worker // TODO(b/111690435) Move LiteIndex::Element to a separate file so that this
46*8b6cd535SAndroid Build Coastguard Worker // can use sizeof(LiteIndex::Element)
47*8b6cd535SAndroid Build Coastguard Worker uint32_t hit_capacity_elts_with_slop =
48*8b6cd535SAndroid Build Coastguard Worker hit_buffer_want_merge_bytes / sizeof(TermIdHitPair);
49*8b6cd535SAndroid Build Coastguard Worker // Add some slop for index variants on top of max num tokens.
50*8b6cd535SAndroid Build Coastguard Worker hit_capacity_elts_with_slop += kIcingMaxNumHitsPerDocument;
51*8b6cd535SAndroid Build Coastguard Worker hit_capacity_elts_with_slop *= kHitBufferSlopMult;
52*8b6cd535SAndroid Build Coastguard Worker
53*8b6cd535SAndroid Build Coastguard Worker return hit_capacity_elts_with_slop;
54*8b6cd535SAndroid Build Coastguard Worker }
55*8b6cd535SAndroid Build Coastguard Worker
CalculateTrieOptions(uint32_t hit_buffer_size)56*8b6cd535SAndroid Build Coastguard Worker IcingDynamicTrie::Options CalculateTrieOptions(uint32_t hit_buffer_size) {
57*8b6cd535SAndroid Build Coastguard Worker // The default min is 1/5th of the main index lexicon, which can
58*8b6cd535SAndroid Build Coastguard Worker // hold >1M terms. We don't need values so value size is 0. We
59*8b6cd535SAndroid Build Coastguard Worker // conservatively scale from there.
60*8b6cd535SAndroid Build Coastguard Worker //
61*8b6cd535SAndroid Build Coastguard Worker // We can give this a lot of headroom because overestimating the
62*8b6cd535SAndroid Build Coastguard Worker // requirement has minimal resource impact.
63*8b6cd535SAndroid Build Coastguard Worker double scaling_factor =
64*8b6cd535SAndroid Build Coastguard Worker std::max(1.0, static_cast<double>(hit_buffer_size) / (100u << 10));
65*8b6cd535SAndroid Build Coastguard Worker return IcingDynamicTrie::Options((200u << 10) * scaling_factor,
66*8b6cd535SAndroid Build Coastguard Worker (200u << 10) * scaling_factor,
67*8b6cd535SAndroid Build Coastguard Worker (1u << 20) * scaling_factor, 0);
68*8b6cd535SAndroid Build Coastguard Worker }
69*8b6cd535SAndroid Build Coastguard Worker
70*8b6cd535SAndroid Build Coastguard Worker } // namespace
71*8b6cd535SAndroid Build Coastguard Worker
LiteIndexOptions(const std::string & filename_base,uint32_t hit_buffer_want_merge_bytes,bool hit_buffer_sort_at_indexing,uint32_t hit_buffer_sort_threshold_bytes)72*8b6cd535SAndroid Build Coastguard Worker LiteIndexOptions::LiteIndexOptions(const std::string& filename_base,
73*8b6cd535SAndroid Build Coastguard Worker uint32_t hit_buffer_want_merge_bytes,
74*8b6cd535SAndroid Build Coastguard Worker bool hit_buffer_sort_at_indexing,
75*8b6cd535SAndroid Build Coastguard Worker uint32_t hit_buffer_sort_threshold_bytes)
76*8b6cd535SAndroid Build Coastguard Worker : filename_base(filename_base),
77*8b6cd535SAndroid Build Coastguard Worker hit_buffer_want_merge_bytes(hit_buffer_want_merge_bytes),
78*8b6cd535SAndroid Build Coastguard Worker hit_buffer_sort_at_indexing(hit_buffer_sort_at_indexing),
79*8b6cd535SAndroid Build Coastguard Worker hit_buffer_sort_threshold_bytes(hit_buffer_sort_threshold_bytes) {
80*8b6cd535SAndroid Build Coastguard Worker hit_buffer_size = CalculateHitBufferSize(hit_buffer_want_merge_bytes);
81*8b6cd535SAndroid Build Coastguard Worker lexicon_options = CalculateTrieOptions(hit_buffer_size);
82*8b6cd535SAndroid Build Coastguard Worker display_mappings_options = CalculateTrieOptions(hit_buffer_size);
83*8b6cd535SAndroid Build Coastguard Worker }
84*8b6cd535SAndroid Build Coastguard Worker
85*8b6cd535SAndroid Build Coastguard Worker } // namespace lib
86*8b6cd535SAndroid Build Coastguard Worker } // namespace icing
87