xref: /aosp_15_r20/external/icing/icing/legacy/index/icing-dynamic-trie.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 //
15 // We store the trie in three areas: nodes, nexts and suffixes.
16 //
17 // Nodes contain an index to a children array (kept in nexts) or to
18 // suffixes (for leaf nodes). Nexts contain children arrays of
19 // different sizes. Each child entry has the matched char and an index
20 // back into the nodes. Leaf nodes index into suffixes instead of the
21 // nexts array. Each suffix is a NULL-terminated suffix off the trie,
22 // followed by a 4-byte value associated with that key.
23 //
24 // Allocation
25 //
26 // Nodes are allocated and never removed. Nexts contain arrays of
27 // sizes in power-of-2 increments, i.e. 1, 2, 4, ..., 256. When the
28 // number of children of a node increases, it is relocated to an array
29 // with the proper size. The (smaller) unused array is added to a free
30 // list. A free list is kept for each array size. Allocations happen
31 // from the free list first, and then from the end of the nexts
32 // array. Suffixes are never freed or compacted. If a node wants to
33 // refer to a smaller suffix, it moves the pointer forward and the
34 // characters before the new pointer are wasted.
35 //
36 // Keys can contain any character except '\0'. The '\0' char is
37 // special in that it specifies an end-of-key in the child array.
38 //
39 // Ideas to try:
40 //
41 // - Put suffix index in a Next instead of creating a leaf node.
42 // - Change allocation buckets to 1, 2, 3, 4, 5, 6, 7, 8, 16, 32, ..., 256
43 // - Compact next array
44 // - GroupVarByte and delta-encode the next array
45 // - Collapse nodes with single children
46 //
47 // Persistence
48 //
49 // We persist the trie in a binary format such that resurrecting the
50 // trie is simply a few file reads. The file is laid out as such:
51 //
52 // - Header
53 // - Nodes
54 // - Nexts
55 // - Suffixes
56 //
57 // Each section is aligned to IcingMMapper::system_page_size(). The max
58 // requested value for each array is pre-allocated in the file. When
59 // we make modifications to the arrays, we set bits in a dirty bitmap
60 // of pages. No changes get written to disk until an explicit call to
61 // Flush. Then we only write the pages that have their dirty bit set.
62 
63 #include "icing/legacy/index/icing-dynamic-trie.h"
64 
65 #include <fcntl.h>
66 #include <sys/mman.h>
67 #include <sys/stat.h>
68 #include <unistd.h>
69 
70 #include <algorithm>
71 #include <cerrno>
72 #include <cinttypes>
73 #include <cstdint>
74 #include <cstring>
75 #include <memory>
76 #include <ostream>
77 #include <string>
78 #include <string_view>
79 #include <utility>
80 #include <vector>
81 
82 #include "icing/text_classifier/lib3/utils/base/status.h"
83 #include "icing/absl_ports/canonical_errors.h"
84 #include "icing/legacy/core/icing-string-util.h"
85 #include "icing/legacy/core/icing-timer.h"
86 #include "icing/legacy/index/icing-array-storage.h"
87 #include "icing/legacy/index/icing-filesystem.h"
88 #include "icing/legacy/index/icing-flash-bitmap.h"
89 #include "icing/legacy/index/icing-mmapper.h"
90 #include "icing/legacy/index/proto/icing-dynamic-trie-header.pb.h"
91 #include "icing/util/crc32.h"
92 #include "icing/util/i18n-utils.h"
93 #include "icing/util/logging.h"
94 #include "icing/util/math-util.h"
95 #include "icing/util/status-macros.h"
96 
97 namespace icing {
98 namespace lib {
99 
100 namespace {
101 
102 constexpr uint32_t kInvalidNodeIndex = (1U << 24) - 1;
103 constexpr uint32_t kInvalidNextIndex = ~0U;
104 
ResetMutableNext(IcingDynamicTrie::Next & mutable_next)105 void ResetMutableNext(IcingDynamicTrie::Next &mutable_next) {
106   mutable_next.set_val(0xff);
107   mutable_next.set_node_index(kInvalidNodeIndex);
108 }
109 
110 // Helper function to check that there is no termination character '\0' in the
111 // key.
IsKeyValid(std::string_view key)112 bool IsKeyValid(std::string_view key) {
113   return key.find('\0') == std::string_view::npos;  // NOLINT
114 }
115 
GetCharOrNull(std::string_view s,int pos)116 char GetCharOrNull(std::string_view s, int pos) {
117   return (pos < s.size()) ? s[pos] : '\0';
118 }
119 
120 }  // namespace
121 
122 // Based on the bit field widths.
123 const uint32_t IcingDynamicTrie::Options::kMaxNodes = (1U << 24) - 1;
124 const uint32_t IcingDynamicTrie::Options::kMaxNexts = (1U << 27) - 1;
125 const uint32_t IcingDynamicTrie::Options::kMaxSuffixesSize = 1U << 27;
126 const uint32_t IcingDynamicTrie::Options::kMaxValueSize = 1U << 16;
127 
128 const uint32_t IcingDynamicTrie::kInvalidSuffixIndex = ~0U;
129 
130 const int IcingDynamicTrie::kMaxNextArraySize;
131 const int IcingDynamicTrie::kNumNextAllocationBuckets;
132 
133 const uint32_t IcingDynamicTrie::kMaxPropertyId;
134 
135 const uint32_t IcingDynamicTrie::kInvalidValueIndex;
136 
137 const uint32_t IcingDynamicTrie::kNoCrc;
138 
139 // Manages logical node candidates while searching for possible
140 // variant matches. Currently implemented as depth first search. The
141 // max stack depth is key length * variant fanout. Since max variant
142 // fanout is 3, we don't need to worry about blowup of the depth first
143 // search stack.
144 //
145 // Keeps track of original matched string (the string actually present
146 // in the trie) for every candidate.
147 class IcingDynamicTrie::CandidateSet {
148  public:
149   struct Candidate {
150     LogicalNode logical_node;
151     const char *key;
152     int matched_prefix_len;
153     std::string matched_span;
154 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate155     Candidate() {}
156 
Candidateicing::lib::IcingDynamicTrie::CandidateSet::Candidate157     Candidate(const LogicalNode &logical_node_in, const char *key_in,
158               int matched_prefix_len_in, const char *matched_span_in,
159               int matched_span_len_in)
160         : logical_node(logical_node_in),
161           key(key_in),
162           matched_prefix_len(matched_prefix_len_in),
163           matched_span(matched_span_in, matched_span_len_in) {}
164 
matched_lenicing::lib::IcingDynamicTrie::CandidateSet::Candidate165     int matched_len() const { return matched_prefix_len + matched_span.size(); }
166   };
167 
CandidateSet(bool prefix)168   explicit CandidateSet(bool prefix) : prefix_(prefix) {}
169 
IsTerminal(const char * key,uint32_t value_index) const170   bool IsTerminal(const char *key, uint32_t value_index) const {
171     // Terminal match condition:
172     //
173     // 1. Key was entirely consumed.
174     // 2. The entire suffix was consumed (hence value index is
175     //    valid). OR, we are ok with prefix matches.
176     return *key == 0 && (value_index != kInvalidValueIndex || prefix_);
177   }
178 
179   // Push a terminal or non-terminal.
Push(const LogicalNode & logical_node,const char * key,uint32_t value_index,int matched_prefix_len,const char * matched_span,int matched_span_len)180   void Push(const LogicalNode &logical_node, const char *key,
181             uint32_t value_index, int matched_prefix_len,
182             const char *matched_span, int matched_span_len) {
183     if (!AddMatchIfTerminal(key, value_index, matched_span, matched_span_len)) {
184       PushNonTerminal(logical_node, key, matched_prefix_len, matched_span,
185                       matched_span_len);
186     }
187   }
188 
AddMatchIfTerminal(const char * key,uint32_t value_index,const char * matched_span,int matched_span_len)189   bool AddMatchIfTerminal(const char *key, uint32_t value_index,
190                           const char *matched_span, int matched_span_len) {
191     if (!IsTerminal(key, value_index)) {
192       return false;
193     }
194 
195     // Terminal match.
196     matches_.push_back(OriginalMatch());
197     OriginalMatch *match = &matches_.back();
198     match->value_index = value_index;
199     match->orig.reserve(cur_prefix_.size() + matched_span_len);
200     match->orig.append(cur_prefix_).append(matched_span, matched_span_len);
201     return true;
202   }
203 
204   // Push a definite non-terminal.
PushNonTerminal(const LogicalNode & logical_node,const char * key,int matched_prefix_len,const char * matched_span,int matched_span_len)205   void PushNonTerminal(const LogicalNode &logical_node, const char *key,
206                        int matched_prefix_len, const char *matched_span,
207                        int matched_span_len) {
208     candidates_.push_back(Candidate(logical_node, key, matched_prefix_len,
209                                     matched_span, matched_span_len));
210   }
211 
Pop(Candidate * candidate)212   void Pop(Candidate *candidate) {
213     *candidate = candidates_.back();
214     if (cur_prefix_.size() < candidate->matched_prefix_len) {
215       ICING_LOG(FATAL)
216           << "Length of current prefix is smaller than length of matched "
217              "prefer, there're inconsistencies in dynamic trie.";
218     }
219 
220     cur_prefix_.resize(candidate->matched_prefix_len);
221     cur_prefix_.append(candidate->matched_span);
222     candidates_.pop_back();
223   }
224 
empty() const225   bool empty() const { return candidates_.empty(); }
226 
Release(std::vector<OriginalMatch> * ret)227   void Release(std::vector<OriginalMatch> *ret) {
228     if (!empty()) {
229       ICING_LOG(FATAL) << "Candidate set not empty before releasing matches";
230     }
231 
232     ret->swap(matches_);
233 
234     cur_prefix_.clear();
235     candidates_.clear();
236     matches_.clear();
237   }
238 
239  private:
240   const bool prefix_;
241 
242   std::string cur_prefix_;
243   std::vector<Candidate> candidates_;
244 
245   std::vector<IcingDynamicTrie::OriginalMatch> matches_;
246 };
247 
248 // Options.
is_valid() const249 bool IcingDynamicTrie::Options::is_valid() const {
250   return max_nodes <= kMaxNodes && max_nodes > 0 && max_nexts <= kMaxNexts &&
251          max_nexts > 0 && max_suffixes_size <= kMaxSuffixesSize &&
252          max_suffixes_size > 0 && value_size <= kMaxValueSize;
253 }
254 
255 // IcingDynamicTrieStorage
256 class IcingDynamicTrie::IcingDynamicTrieStorage {
257  public:
258   IcingDynamicTrieStorage(const std::string &file_basename,
259                           const RuntimeOptions &runtime_options,
260                           const IcingFilesystem *filesystem);
261   ~IcingDynamicTrieStorage();
262 
is_initialized() const263   bool is_initialized() const { return hdr_mmapper_.is_valid(); }
264 
265   bool CreateIfNotExist(const Options &options);
266   bool Init();
267   static bool Remove(const std::string &file_basename,
268                      const IcingFilesystem &filesystem);
269   bool Sync();
270   uint64_t GetDiskUsage() const;
271 
272   // Returns the size of the elements held in the trie. This excludes the size
273   // of any internal metadata of the trie, e.g. the trie's header.
274   uint64_t GetElementsFileSize() const;
275 
276   void Warm();
277 
278   void Clear();
279 
empty() const280   bool empty() const { return hdr().num_nodes() == 0; }
281 
282   // Never cast off these consts when writing to the arrays. Always
283   // use the GetMutable* helpers above.
GetNode(uint32_t idx) const284   const Node *GetNode(uint32_t idx) const {
285     return &array_storage_[NODE].array_cast<Node>()[idx];
286   }
287 
288   // REQUIRES: !empty(). Otherwise node 0 could contain invalid data
289   //   (next_index, is_leaf, log2_num_children).
GetRootNode() const290   const Node *GetRootNode() const { return GetNode(0); }
291 
GetNext(uint32_t idx,int child) const292   const Next *GetNext(uint32_t idx, int child) const {
293     return &array_storage_[NEXT].array_cast<Next>()[idx + child];
294   }
GetSuffix(uint32_t idx) const295   const char *GetSuffix(uint32_t idx) const {
296     return &array_storage_[SUFFIX].array_cast<char>()[idx];
297   }
298 
GetNodeIndex(const Node * node) const299   uint32_t GetNodeIndex(const Node *node) const { return node - GetNode(0); }
GetNextArrayIndex(const Next * next) const300   uint32_t GetNextArrayIndex(const Next *next) const {
301     return next - GetNext(0, 0);
302   }
GetSuffixIndex(const char * suffix) const303   uint32_t GetSuffixIndex(const char *suffix) const {
304     return suffix - GetSuffix(0);
305   }
306 
307   // By default, nodes_, nexts_ and suffixes_ are read-only. This
308   // returns a writable element or array within and sets
309   // dirty_pages_[array_type] as a side effect, assuming the mutable
310   // area will get written to.
311   Node *GetMutableNode(uint32_t idx);
312   Next *GetMutableNextArray(uint32_t idx, uint32_t len);
313   char *GetMutableSuffix(uint32_t idx, uint32_t len);
314 
315   // Update crcs based on current contents. Returns all_crc or kNoCrc.
316   Crc32 UpdateCrc();
317 
318   // Calculates the current crc and returns it.
319   Crc32 GetCrc() const;
320 
321   // Allocators.
322   uint32_t nodes_left() const;
323   uint32_t nexts_left() const;
324   uint32_t suffixes_left() const;
325 
326   // REQUIRES: nodes_left() > 0.
327   Node *AllocNode();
328   // REQUIRES: nexts_left() >= kMaxNextArraySize.
329   libtextclassifier3::StatusOr<Next *> AllocNextArray(int size);
330   void FreeNextArray(Next *next, int log2_size);
331   // REQUIRES: suffixes_left() >= strlen(suffix) + 1 + value_size()
332   uint32_t MakeSuffix(std::string_view suffix, const void *value,
333                       uint32_t *value_index);
334 
hdr() const335   const IcingDynamicTrieHeader &hdr() const { return hdr_.hdr; }
336 
value_size() const337   uint32_t value_size() const { return hdr().value_size(); }
338 
339   void FillDirtyPageStats(Stats *stats) const;
340 
inc_num_keys()341   void inc_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() + 1); }
342 
dec_num_keys()343   void dec_num_keys() { hdr_.hdr.set_num_keys(hdr_.hdr.num_keys() - 1); }
344 
345  private:
346   friend void IcingDynamicTrie::SetHeader(
347       const IcingDynamicTrieHeader &new_hdr);
348 
349   enum ArrayType { NODE, NEXT, SUFFIX, NUM_ARRAY_TYPES };
350 
351   // Returns all filenames that are part of the storage. First
352   // filename is the header and the rest correspond to ArrayType enum
353   // values.
354   static void GetFilenames(const std::string &file_basename,
355                            std::vector<std::string> *filenames);
356   static std::string GetHeaderFilename(const std::string &file_basename);
357 
358   // TODO(b/353398502) Improve the handling of the header to avoid this weird
359   // pattern with both an mmapped header and an in-memory header.
360   // Calculates the crc of the header as it is represented in the
361   // header_mmapper_.
362   Crc32 GetWrittenHeaderCrc() const;
363 
364   // Calculates the crc of the hdr_.
365   //
366   // This value will deviate from GetWrittenHeaderCrc() if the header is
367   // modified after initialization or the last call to WriteHeader().
368   //
369   // NOTE: This function will need to serialize hdr_ to calculate the crc.
370   // Therefore, if given the choice, prefer to use GetWrittenHeaderCrc().
371   Crc32 GetHeaderCrc() const;
372 
373   // Returns the crc of the cached crcs.
374   Crc32 GetCachedCrc() const;
375 
376   Crc32 UpdateCrcInternal(bool write_hdr);
377 
378   // Initializes hdr_ with options and writes the resulting header to disk.
379   bool CreateNewHeader(IcingScopedFd sfd, const Options &options);
380   bool WriteHeader();
381 
382   // Header block. On-disk header block format is as follows:
383   //
384   // |serialized-header|pad|crcs|
385   // <--- system_page_size() --->
386 
387   // Wrapper for header protobuf.
388   class Header {
389     // Serialized format:
390     //
391     // magic(4)|size(4)|serialized hdr(size)
392     static const uint32_t kMagic;
393     // TODO(b/77482303) : Remove version from the IcingFlashBitmap header -
394     // magic makes it unnecessary.
395     static const uint32_t kCurVersion;
396 
397    public:
398     void Init(const Options &options);
399     bool Init(const uint8_t *buf, uint32_t buf_size);
Invalidate()400     void Invalidate() { hdr.Clear(); }
401     bool SerializeToArray(uint8_t *buf, uint32_t buf_size) const;
402     bool Verify();
403 
404     IcingDynamicTrieHeader hdr;
405   };
406 
407   std::string file_basename_;
408 
409   Header hdr_;
410 
411   IcingMMapper hdr_mmapper_;
412 
413   struct Crcs {
414     uint32_t all_crc;
415     uint32_t header_crc;
416     uint32_t array_crcs[NUM_ARRAY_TYPES];
417   };
418   Crcs *cached_crcs_;
419 
serialized_header_max()420   static uint32_t serialized_header_max() {
421     return IcingMMapper::system_page_size() - sizeof(Crcs);
422   }
423 
424   static Crc32 GetAllCrcs(const Crcs &crcs);
425 
426   RuntimeOptions runtime_options_;
427 
428   // Info kept about each array (NODE, NEXT, SUFFIX) to manage
429   // storage.
430   IcingScopedFd array_fds_[NUM_ARRAY_TYPES];
431   std::vector<IcingArrayStorage> array_storage_;
432 
433   // Legacy file system. Switch to use the new Filesystem class instead.
434   const IcingFilesystem *filesystem_;
435 };
436 
IcingDynamicTrieStorage(const std::string & file_basename,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)437 IcingDynamicTrie::IcingDynamicTrieStorage::IcingDynamicTrieStorage(
438     const std::string &file_basename, const RuntimeOptions &runtime_options,
439     const IcingFilesystem *filesystem)
440     : file_basename_(file_basename),
441       hdr_mmapper_(false, MAP_SHARED),
442       cached_crcs_(nullptr),
443       runtime_options_(runtime_options),
444       array_storage_(NUM_ARRAY_TYPES, IcingArrayStorage(*filesystem)),
445       filesystem_(filesystem) {}
446 
~IcingDynamicTrieStorage()447 IcingDynamicTrie::IcingDynamicTrieStorage::~IcingDynamicTrieStorage() {
448   if (is_initialized()) {
449     for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
450       array_storage_[i].Reset();
451     }
452   }
453 }
454 
GetFilenames(const std::string & file_basename,std::vector<std::string> * filenames)455 void IcingDynamicTrie::IcingDynamicTrieStorage::GetFilenames(
456     const std::string &file_basename, std::vector<std::string> *filenames) {
457   const char *kArrayFilenameSuffixes[NUM_ARRAY_TYPES] = {
458       ".n",
459       ".x",
460       ".s",
461   };
462 
463   filenames->clear();
464   filenames->push_back(GetHeaderFilename(file_basename));
465   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
466     filenames->push_back(file_basename + kArrayFilenameSuffixes[i]);
467   }
468 }
469 
GetHeaderFilename(const std::string & file_basename)470 std::string IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderFilename(
471     const std::string &file_basename) {
472   constexpr char kHeaderFilenameSuffix[] = ".h";
473   return file_basename + kHeaderFilenameSuffix;
474 }
475 
Init()476 bool IcingDynamicTrie::IcingDynamicTrieStorage::Init() {
477   bool init_crcs = false;
478   const bool map_shared =
479       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc;
480 
481   // Open files.
482   std::vector<std::string> filenames;
483   GetFilenames(file_basename_, &filenames);
484   for (size_t i = 0; i < filenames.size(); i++) {
485     uint64_t file_size = filesystem_->GetFileSize(filenames[i].c_str());
486     if (file_size == IcingFilesystem::kBadFileSize) {
487       goto failed;
488     }
489     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
490     if (!sfd.is_valid()) {
491       goto failed;
492     }
493     // The first filename is the header and the rest correspond to ArrayType
494     // enum values. The header's fd can be closed immediately after mmapping
495     // (see b/114830334). Other files' fds are tracked in array_fds_ for later
496     // closing.
497     if (i == 0) {
498       // Header.
499       if (file_size != IcingMMapper::system_page_size()) {
500         ICING_LOG(ERROR) << "Trie hdr wrong size: " << file_size;
501         goto failed;
502       }
503 
504       // Open hdr.
505       hdr_mmapper_.Remap(sfd.get(), 0, IcingMMapper::system_page_size());
506       if (!hdr_mmapper_.is_valid()) {
507         ICING_LOG(ERROR) << "Trie map header failed";
508         goto failed;
509       }
510     } else {
511       array_fds_[i - 1] = std::move(sfd);
512     }
513   }
514 
515   // Point crcs_ to correct region.
516   cached_crcs_ = reinterpret_cast<Crcs *>(hdr_mmapper_.address() +
517                                           serialized_header_max());
518   // Header hasn't been initialized yet. So we should check what's actually
519   // written to checksum the header.
520   if (cached_crcs_->header_crc == kNoCrc) {
521     // Create crcs.
522     cached_crcs_->header_crc = GetWrittenHeaderCrc().Get();
523 
524     // Do the same for the arrays.
525     init_crcs = true;
526   } else {
527     // Verify crc.
528     if (cached_crcs_->header_crc != GetWrittenHeaderCrc().Get()) {
529       ICING_LOG(ERROR) << "Trie header crc failed";
530       goto failed;
531     }
532   }
533 
534   // Deserialize and verify header.
535   if (!hdr_.Init(hdr_mmapper_.address(),
536                  IcingMMapper::system_page_size() - sizeof(Crcs)) ||
537       !hdr_.Verify()) {
538     ICING_LOG(ERROR) << "Trie reading header failed";
539     goto failed;
540   }
541 
542   // We have the header set up. Now read in the arrays.
543   if (!array_storage_[NODE].Init(array_fds_[NODE].get(), 0, map_shared,
544                                  sizeof(Node), hdr_.hdr.num_nodes(),
545                                  hdr_.hdr.max_nodes(),
546                                  &cached_crcs_->array_crcs[NODE], init_crcs)) {
547     ICING_LOG(ERROR) << "Trie mmap node failed";
548     goto failed;
549   }
550 
551   if (!array_storage_[NEXT].Init(array_fds_[NEXT].get(), 0, map_shared,
552                                  sizeof(Next), hdr_.hdr.num_nexts(),
553                                  hdr_.hdr.max_nexts(),
554                                  &cached_crcs_->array_crcs[NEXT], init_crcs)) {
555     ICING_LOG(ERROR) << "Trie mmap next failed";
556     goto failed;
557   }
558 
559   if (!array_storage_[SUFFIX].Init(
560           array_fds_[SUFFIX].get(), 0, map_shared, sizeof(char),
561           hdr_.hdr.suffixes_size(), hdr_.hdr.max_suffixes_size(),
562           &cached_crcs_->array_crcs[SUFFIX], init_crcs)) {
563     ICING_LOG(ERROR) << "Trie mmap suffix failed";
564     goto failed;
565   }
566 
567   // All of the cached crcs are now up to date. Either calculate+set the all crc
568   // or calculate+check the all crc.
569   if (init_crcs) {
570     cached_crcs_->all_crc = GetCachedCrc().Get();
571   } else {
572     // Verify crc.
573     if (cached_crcs_->all_crc != GetCachedCrc().Get()) {
574       ICING_LOG(ERROR) << "Trie all crc failed";
575       goto failed;
576     }
577   }
578 
579   return true;
580 
581 failed:
582   cached_crcs_ = nullptr;
583   hdr_mmapper_.Unmap();
584   hdr_.Invalidate();
585   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
586     array_storage_[i].Reset();
587     array_fds_[i].reset();
588   }
589 
590   return false;
591 }
592 
CreateIfNotExist(const Options & options)593 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateIfNotExist(
594     const Options &options) {
595   std::vector<std::string> filenames;
596   GetFilenames(file_basename_, &filenames);
597 
598   // Check already exists. Just header file check is enough.
599   if (filesystem_->FileExists(filenames[0].c_str())) {
600     return true;
601   }
602 
603   // Ensure the storage directory exists
604   std::string storage_dir = filesystem_->GetDirname(filenames[0].c_str());
605   if (!filesystem_->CreateDirectoryRecursively(storage_dir.c_str())) {
606     return false;
607   }
608 
609   // Create files.
610   for (size_t i = 0; i < filenames.size(); i++) {
611     IcingScopedFd sfd(filesystem_->OpenForWrite(filenames[i].c_str()));
612     if (!sfd.is_valid()) {
613       Remove(file_basename_, *filesystem_);
614       return false;
615     }
616 
617     if (i == 0) {
618       if (!CreateNewHeader(std::move(sfd), options)) {
619         ICING_LOG(ERROR) << "Serialize trie header failed";
620         Remove(file_basename_, *filesystem_);
621         return false;
622       }
623     } else {
624       // Crcs are automatically kNoCrc so they will be initialized
625       // upon first call to Init.
626       if (!filesystem_->Truncate(*sfd, 0)) {
627         Remove(file_basename_, *filesystem_);
628         return false;
629       }
630     }
631   }
632   return true;
633 }
634 
CreateNewHeader(IcingScopedFd sfd,const Options & options)635 bool IcingDynamicTrie::IcingDynamicTrieStorage::CreateNewHeader(
636     IcingScopedFd sfd, const Options &options) {
637   ICING_VLOG(1) << "Creating header with write+sync";
638   hdr_.Init(options);
639   auto buf = std::make_unique<uint8_t[]>(IcingMMapper::system_page_size());
640   // serialized_header_max must be less than system_page_size so we don't
641   // overflow buf when serializing the header.
642   if (serialized_header_max() > IcingMMapper::system_page_size()) {
643     ICING_LOG(FATAL) << "serialized_header_max exceeds system page size";
644   }
645 
646   return hdr_.SerializeToArray(buf.get(), serialized_header_max()) &&
647          filesystem_->Write(sfd.get(), buf.get(),
648                             IcingMMapper::system_page_size()) &&
649          filesystem_->DataSync(sfd.get());
650 }
651 
Remove(const std::string & file_basename,const IcingFilesystem & filesystem)652 bool IcingDynamicTrie::IcingDynamicTrieStorage::Remove(
653     const std::string &file_basename, const IcingFilesystem &filesystem) {
654   bool success = true;
655   std::vector<std::string> files;
656   GetFilenames(file_basename, &files);
657   for (size_t i = 0; i < files.size(); i++) {
658     if (!filesystem.DeleteFile(files[i].c_str())) {
659       success = false;
660     }
661   }
662   return success;
663 }
664 
Warm()665 void IcingDynamicTrie::IcingDynamicTrieStorage::Warm() {
666   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
667     array_storage_[i].Warm();
668   }
669 }
670 
Clear()671 void IcingDynamicTrie::IcingDynamicTrieStorage::Clear() {
672   if (!is_initialized()) {
673     ICING_LOG(FATAL) << "DynamicTrie not initialized";
674   }
675 
676   // Clear header.
677   hdr_.hdr.set_num_nodes(0);
678   hdr_.hdr.set_num_nexts(0);
679   hdr_.hdr.set_suffixes_size(0);
680   for (int i = 0; i < hdr_.hdr.free_lists_size(); i++) {
681     hdr_.hdr.set_free_lists(i, kInvalidNextIndex);
682   }
683   hdr_.hdr.set_num_keys(0);
684 
685   // Clear array storage.
686   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
687     array_storage_[i].Clear();
688   }
689 
690   // Copy to persistence.
691   WriteHeader();
692 }
693 
Sync()694 bool IcingDynamicTrie::IcingDynamicTrieStorage::Sync() {
695   if (!is_initialized()) {
696     ICING_LOG(FATAL) << "DynamicTrie not initialized";
697   }
698 
699   uint32_t total_flushed = 0;
700   bool success = true;
701 
702   // Sync all array types.
703   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
704     total_flushed += array_storage_[i].Sync();
705     if (!filesystem_->DataSync(array_fds_[i].get())) {
706       ICING_LOG(ERROR) << "Unable to sync data for flushing";
707       success = false;
708     }
709   }
710 
711   if (!WriteHeader()) {
712     ICING_LOG(ERROR) << "Flushing trie header failed: " << strerror(errno);
713     success = false;
714   }
715 
716   // Need to update CRCs before we sync the header mmap.
717   UpdateCrcInternal(false);
718 
719   // Sync header.
720   if (!hdr_mmapper_.Sync()) {
721     ICING_LOG(ERROR) << "Unable to sync trie header for flushing";
722     success = false;
723   }
724 
725   if (total_flushed > 0) {
726     ICING_VLOG(1) << "Flushing " << total_flushed << " pages of trie";
727   }
728 
729   return success;
730 }
731 
GetDiskUsage() const732 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetDiskUsage() const {
733   // Trie files themselves.
734   uint64_t total = 0;
735   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
736     IcingFilesystem::IncrementByOrSetInvalid(
737         filesystem_->GetDiskUsage(array_fds_[i].get()), &total);
738   }
739 
740   // Header.
741   std::string header_filename = GetHeaderFilename(file_basename_);
742   IcingFilesystem::IncrementByOrSetInvalid(
743       filesystem_->GetFileDiskUsage(header_filename.c_str()), &total);
744 
745   return total;
746 }
747 
GetElementsFileSize() const748 uint64_t IcingDynamicTrie::IcingDynamicTrieStorage::GetElementsFileSize()
749     const {
750   // Trie files themselves, exclude size of the header. These arrays are dense,
751   // not sparse, so use file size for more accurate numbers.
752   uint64_t total = 0;
753   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
754     IcingFilesystem::IncrementByOrSetInvalid(
755         filesystem_->GetFileSize(array_fds_[i].get()), &total);
756   }
757   return total;
758 }
759 
AllocNode()760 IcingDynamicTrie::Node *IcingDynamicTrie::IcingDynamicTrieStorage::AllocNode() {
761   if (nodes_left() == 0) {
762     ICING_LOG(FATAL) << "No allocated nodes left";
763   }
764 
765   hdr_.hdr.set_num_nodes(hdr_.hdr.num_nodes() + 1);
766   return GetMutableNode(hdr_.hdr.num_nodes() - 1);
767 }
768 
769 libtextclassifier3::StatusOr<IcingDynamicTrie::Next *>
AllocNextArray(int size)770 IcingDynamicTrie::IcingDynamicTrieStorage::AllocNextArray(int size) {
771   if (size > kMaxNextArraySize) {
772     return absl_ports::InternalError(
773         "Array size exceeds the max 'next' array size");
774   }
775 
776   if (nexts_left() < static_cast<uint32_t>(kMaxNextArraySize)) {
777     ICING_LOG(FATAL) << "'next' buffer not enough";
778   }
779 
780   // Compute ceil(log2(size)).
781   int log2_size = 0;
782   while ((1 << log2_size) < size) log2_size++;
783   // Note: size <= aligned_size <= kMaxNextArraySize
784   int aligned_size = 1 << log2_size;
785 
786   // Look in free list.
787   Next *ret;
788   if (hdr_.hdr.free_lists(log2_size) != kInvalidNextIndex) {
789     ret = GetMutableNextArray(hdr_.hdr.free_lists(log2_size), aligned_size);
790     uint32_t next_link = ret->next_index();
791     if (next_link != kInvalidNextIndex && next_link >= hdr_.hdr.max_nexts()) {
792       ICING_LOG(FATAL) << "'next' index is out of range";
793     }
794     hdr_.hdr.set_free_lists(log2_size, next_link);
795   } else {
796     // Allocate a new one.
797     ret = GetMutableNextArray(hdr_.hdr.num_nexts(), aligned_size);
798     hdr_.hdr.set_num_nexts(hdr_.hdr.num_nexts() + aligned_size);
799   }
800 
801   // Fill with char 0xff so we are sorted properly.
802   for (int i = 0; i < aligned_size; i++) {
803     ResetMutableNext(ret[i]);
804   }
805   return ret;
806 }
807 
FreeNextArray(Next * next,int log2_size)808 void IcingDynamicTrie::IcingDynamicTrieStorage::FreeNextArray(Next *next,
809                                                               int log2_size) {
810   if (GetNextArrayIndex(next) + (1 << log2_size) > hdr_.hdr.max_nexts()) {
811     ICING_LOG(FATAL) << "'next' array is out of range";
812   }
813 
814   // Put it in free list.
815   next->set_next_index(hdr_.hdr.free_lists(log2_size));
816   hdr_.hdr.set_free_lists(log2_size, GetNextArrayIndex(next));
817 }
818 
MakeSuffix(std::string_view suffix,const void * value,uint32_t * value_index)819 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::MakeSuffix(
820     std::string_view suffix, const void *value, uint32_t *value_index) {
821   if (suffixes_left() < suffix.size() + 1 + value_size()) {
822     ICING_LOG(FATAL) << "'suffix' buffer not enough";
823   }
824 
825   char *start = GetMutableSuffix(hdr_.hdr.suffixes_size(),
826                                  suffix.size() + 1 + value_size());
827   // Copy suffix.
828   memcpy(start, suffix.data(), suffix.size());
829   // Set a '\0' after suffix.
830   memset(start + suffix.size(), /*value=*/0, /*num=*/1);
831   // Copy value.
832   memcpy(start + suffix.size() + 1, value, value_size());
833   if (value_index) *value_index = GetSuffixIndex(start + suffix.size() + 1);
834   hdr_.hdr.set_suffixes_size(hdr_.hdr.suffixes_size() + suffix.size() + 1 +
835                              value_size());
836 
837   return GetSuffixIndex(start);
838 }
839 
GetWrittenHeaderCrc() const840 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::GetWrittenHeaderCrc() const {
841   std::string_view data(reinterpret_cast<const char *>(hdr_mmapper_.address()),
842                         serialized_header_max());
843   return Crc32(data);
844 }
845 
GetHeaderCrc() const846 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::GetHeaderCrc() const {
847   // Create a buffer that is the same as the mmapped header.
848   auto hdr_data = std::make_unique<uint8_t[]>(serialized_header_max());
849   std::memcpy(hdr_data.get(), hdr_mmapper_.address(), serialized_header_max());
850 
851   // Serialize the in-memory header to the buffer and then checksum it.
852   hdr_.SerializeToArray(hdr_data.get(), serialized_header_max());
853   std::string_view data(reinterpret_cast<const char *>(hdr_data.get()),
854                         serialized_header_max());
855   return Crc32(data);
856 }
857 
GetCachedCrc() const858 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::GetCachedCrc() const {
859   return GetAllCrcs(*cached_crcs_);
860 }
861 
GetAllCrcs(const IcingDynamicTrie::IcingDynamicTrieStorage::Crcs & crcs)862 /*static*/ Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::GetAllCrcs(
863     const IcingDynamicTrie::IcingDynamicTrieStorage::Crcs &crcs) {
864   // Append array crcs to header crc.
865   Crc32 crc(crcs.header_crc);
866   std::string_view data(reinterpret_cast<const char *>(crcs.array_crcs),
867                         sizeof(cached_crcs_->array_crcs));
868   crc.Append(data);
869   return crc;
870 }
871 
GetCrc() const872 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::GetCrc() const {
873   // crcs_ holds cached values. We want the crcs that represent the content *as
874   // it exists now*.
875   Crcs crcs;
876   crcs.header_crc = GetHeaderCrc().Get();
877   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
878     crcs.array_crcs[i] = array_storage_[i].GetCrc().Get();
879   }
880   return GetAllCrcs(crcs);
881 }
882 
UpdateCrc()883 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrc() {
884   return UpdateCrcInternal(true);
885 }
886 
UpdateCrcInternal(bool write_hdr)887 Crc32 IcingDynamicTrie::IcingDynamicTrieStorage::UpdateCrcInternal(
888     bool write_hdr) {
889   if (write_hdr && !WriteHeader()) {
890     ICING_LOG(ERROR) << "Flushing trie header failed: " << strerror(errno);
891   }
892 
893   // Since we just wrote the header, GetHeaderCrc() and GetWrittenHeaderCrc()
894   // are equivalent. We call GetWrittenHeaderCrc() because it avoids serializing
895   // the header to recalculate the crc.
896   cached_crcs_->header_crc = GetWrittenHeaderCrc().Get();
897   for (int i = 0; i < NUM_ARRAY_TYPES; i++) {
898     array_storage_[i].UpdateCrc();
899   }
900 
901   // All of the cached crcs are now up to date.
902   Crc32 all_crc = GetAllCrcs(*cached_crcs_);
903   cached_crcs_->all_crc = all_crc.Get();
904   return all_crc;
905 }
906 
WriteHeader()907 bool IcingDynamicTrie::IcingDynamicTrieStorage::WriteHeader() {
908   return hdr_.SerializeToArray(hdr_mmapper_.address(), serialized_header_max());
909 }
910 
911 IcingDynamicTrie::Node *
GetMutableNode(uint32_t idx)912 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNode(uint32_t idx) {
913   return array_storage_[NODE].GetMutableMem<Node>(idx, 1);
914 }
915 
916 IcingDynamicTrie::Next *
GetMutableNextArray(uint32_t idx,uint32_t len)917 IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableNextArray(uint32_t idx,
918                                                                uint32_t len) {
919   return array_storage_[NEXT].GetMutableMem<Next>(idx, len);
920 }
921 
GetMutableSuffix(uint32_t idx,uint32_t len)922 char *IcingDynamicTrie::IcingDynamicTrieStorage::GetMutableSuffix(
923     uint32_t idx, uint32_t len) {
924   return array_storage_[SUFFIX].GetMutableMem<char>(idx, len);
925 }
926 
927 // Header functions.
928 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kMagic =
929     0x6dfba6ae;
930 // For future revisions, this should be synced with global index version.
931 // See comments on Upgrade() in native-index-impl.h for versioning.
932 const uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::Header::kCurVersion =
933     4;
934 
Init(const IcingDynamicTrie::Options & options)935 void IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
936     const IcingDynamicTrie::Options &options) {
937   hdr.Clear();
938 
939   hdr.set_version(kCurVersion);
940   hdr.set_max_nodes(options.max_nodes);
941   hdr.set_max_nexts(options.max_nexts);
942   hdr.set_max_suffixes_size(options.max_suffixes_size);
943   hdr.set_value_size(options.value_size);
944 
945   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
946     hdr.add_free_lists(kInvalidNextIndex);
947   }
948 }
949 
Init(const uint8_t * buf,uint32_t buf_size)950 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Init(
951     const uint8_t *buf, uint32_t buf_size) {
952   // Check magic and length.
953   if (buf_size <= sizeof(kMagic) + sizeof(uint32_t)) {
954     ICING_LOG(ERROR) << "Trie header too short";
955     return false;
956   }
957 
958   uint32_t magic;
959   memcpy(&magic, buf, sizeof(magic));
960   if (magic != kMagic) {
961     ICING_LOG(ERROR) << "Trie header magic mismatch";
962     return false;
963   }
964   uint32_t len;
965   memcpy(&len, buf + sizeof(magic), sizeof(len));
966   if (len > buf_size - sizeof(magic) - sizeof(len)) {
967     ICING_LOG(ERROR) << "Trie header too short";
968     return false;
969   }
970 
971   return hdr.ParseFromArray(buf + sizeof(magic) + sizeof(len), len);
972 }
973 
SerializeToArray(uint8_t * buf,uint32_t buf_size) const974 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::SerializeToArray(
975     uint8_t *buf, uint32_t buf_size) const {
976   uint32_t size = hdr.ByteSizeLong();
977   if (size + sizeof(kMagic) + sizeof(uint32_t) > buf_size) return false;
978   memcpy(buf, &kMagic, sizeof(kMagic));
979   memcpy(buf + sizeof(kMagic), &size, sizeof(uint32_t));
980   hdr.SerializeWithCachedSizesToArray(buf + sizeof(kMagic) + sizeof(uint32_t));
981   return true;
982 }
983 
Verify()984 bool IcingDynamicTrie::IcingDynamicTrieStorage::Header::Verify() {
985   // Check version.
986   if (hdr.version() != kCurVersion) {
987     ICING_LOG(ERROR) << "Trie version " << hdr.version() << " mismatch";
988     return false;
989   }
990 
991   // Check that indices in hdr are within bounds. Note that this is
992   // not a comprehensive integrity check for the entire trie.
993   if (hdr.num_nodes() > hdr.max_nodes() || hdr.num_nexts() > hdr.max_nexts() ||
994       hdr.suffixes_size() > hdr.max_suffixes_size() ||
995       hdr.value_size() >= hdr.max_suffixes_size()) {
996     ICING_LOG(ERROR) << "Trie header array size out of bounds";
997     return false;
998   }
999 
1000   if (hdr.free_lists_size() != kNumNextAllocationBuckets) {
1001     ICING_LOG(ERROR) << "Bad number of free lists";
1002     return false;
1003   }
1004 
1005   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1006     if (hdr.free_lists(i) != kInvalidNextIndex &&
1007         hdr.free_lists(i) >= hdr.max_nexts()) {
1008       ICING_LOG(ERROR) << "Free list index out of bounds";
1009       return false;
1010     }
1011   }
1012 
1013   return true;
1014 }
1015 
nodes_left() const1016 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nodes_left() const {
1017   return hdr_.hdr.max_nodes() - hdr_.hdr.num_nodes();
1018 }
1019 
nexts_left() const1020 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::nexts_left() const {
1021   return hdr_.hdr.max_nexts() - hdr_.hdr.num_nexts();
1022 }
1023 
suffixes_left() const1024 uint32_t IcingDynamicTrie::IcingDynamicTrieStorage::suffixes_left() const {
1025   return hdr_.hdr.max_suffixes_size() - hdr_.hdr.suffixes_size();
1026 }
1027 
FillDirtyPageStats(Stats * stats) const1028 void IcingDynamicTrie::IcingDynamicTrieStorage::FillDirtyPageStats(
1029     Stats *stats) const {
1030   stats->dirty_pages_nodes = array_storage_[NODE].num_dirty_pages();
1031   stats->dirty_pages_nexts = array_storage_[NEXT].num_dirty_pages();
1032   stats->dirty_pages_suffixes = array_storage_[SUFFIX].num_dirty_pages();
1033 }
1034 
1035 // Dumper.
1036 class IcingDynamicTrie::Dumper {
1037  public:
Dumper(const IcingDynamicTrie & trie)1038   explicit Dumper(const IcingDynamicTrie &trie)
1039       : all_props_(trie), del_prop_(trie), storage_(trie.storage_.get()) {}
1040 
Dump(std::ostream * pretty_print,std::vector<std::string> * keys) const1041   void Dump(std::ostream *pretty_print, std::vector<std::string> *keys) const {
1042     if (storage_->empty()) {
1043       *pretty_print << "(empty)\n";
1044     } else {
1045       DumpNodeRecursive("", *storage_->GetRootNode(), 0, pretty_print, keys);
1046     }
1047   }
1048 
1049  private:
SuffixToValueAsString(const char * suffix) const1050   std::string SuffixToValueAsString(const char *suffix) const {
1051     int suffix_len = strlen(suffix);
1052     std::string ret;
1053     ret.reserve(storage_->value_size() * 2);
1054     for (uint32_t i = 0; i < storage_->value_size(); i++) {
1055       IcingStringUtil::SStringAppendF(&ret, 10, "%02x",
1056                                       suffix[suffix_len + 1 + i]);
1057     }
1058 
1059     // Now dump set properties.
1060     uint32_t value_index = storage_->GetSuffixIndex(suffix + suffix_len + 1);
1061     if (del_prop_.HasProperty(value_index)) {
1062       ret += " (deleted)";
1063     }
1064     ret += " [";
1065     for (size_t i = 0; i < all_props_.size(); i++) {
1066       if (all_props_.HasProperty(i, value_index)) {
1067         IcingStringUtil::SStringAppendF(&ret, 10, "%zu", i);
1068       }
1069     }
1070     ret += ']';
1071 
1072     return ret;
1073   }
1074 
1075   // Inputs:
1076   //   prefix - the key prefix of the current node (so we can rebuild the key)
1077   //   node - the node we're at
1078   //   level - how many levels deep we are in the trie
1079   //   ret - the stream to pretty print to
1080   //   keys - the keys encountered are appended to this
1081   //
1082   // REQUIRES: node is valid.
1083   //   - Since we only invalidate Next to remove the edge from the trie and Node
1084   //     is not invalidated after deletion, the caller should ensure that it
1085   //     traverses correctly to a valid node according to the trie structure.
1086   //     Calling this function with an invalid node is undefined behavior since
1087   //     it could traverse into a deleted subtree, or invalid memory addresses.
1088   //   - This also means storage_->empty() should be checked before calling this
1089   //     function with the root node.
DumpNodeRecursive(const std::string & prefix,const Node & node,int level,std::ostream * ret,std::vector<std::string> * keys) const1090   void DumpNodeRecursive(const std::string &prefix, const Node &node, int level,
1091                          std::ostream *ret,
1092                          std::vector<std::string> *keys) const {
1093     // This function should be called only if the node is valid. The first call
1094     // is always from the root node, so it means the trie should not be empty at
1095     // this moment. Otherwise (root) node could contain invalid next_index(),
1096     // is_leaf(), and log2_num_children().
1097     if (node.is_leaf()) {
1098       // Dump suffix and value.
1099       for (int i = 0; i < level; i++) {
1100         *ret << ' ';
1101       }
1102       const char *suffix = storage_->GetSuffix(node.next_index());
1103       *ret << suffix;
1104       *ret << ' ';
1105       *ret << SuffixToValueAsString(suffix);
1106       *ret << '\n';
1107       keys->push_back(prefix + suffix);
1108     } else {
1109       // Go through each child (next) node. Print char and recursively
1110       // print trie underneath.
1111       for (uint32_t i = 0; i < (1U << node.log2_num_children()); i++) {
1112         const Next &next = *storage_->GetNext(node.next_index(), i);
1113         if (next.node_index() == kInvalidNodeIndex) break;
1114         for (int j = 0; j < level; j++) {
1115           *ret << ' ';
1116         }
1117         std::string new_prefix = prefix;
1118         if (next.val()) {
1119           *ret << static_cast<char>(next.val());
1120           new_prefix += next.val();
1121         } else {
1122           *ret << "null";
1123         }
1124         *ret << '\n';
1125         DumpNodeRecursive(new_prefix, *storage_->GetNode(next.node_index()),
1126                           level + 1, ret, keys);
1127       }
1128     }
1129   }
1130 
1131   PropertyReadersAll all_props_;
1132   PropertyDeletedReader del_prop_;
1133   const IcingDynamicTrie::IcingDynamicTrieStorage *storage_;
1134 };
1135 
1136 // IcingDynamicTrie.
IcingDynamicTrie(const std::string & filename_base,const RuntimeOptions & runtime_options,const IcingFilesystem * filesystem)1137 IcingDynamicTrie::IcingDynamicTrie(const std::string &filename_base,
1138                                    const RuntimeOptions &runtime_options,
1139                                    const IcingFilesystem *filesystem)
1140     : IIcingStorage(),
1141       filename_base_(filename_base),
1142       is_initialized_(false),
1143       runtime_options_(runtime_options),
1144       storage_(nullptr),
1145       property_bitmaps_prefix_(filename_base_ + ".prop."),
1146       deleted_bitmap_filename_(filename_base_ + ".deleted"),
1147       deleted_bitmap_(nullptr),
1148       filesystem_(filesystem) {}
1149 
~IcingDynamicTrie()1150 IcingDynamicTrie::~IcingDynamicTrie() { Close(); }
1151 
Init()1152 bool IcingDynamicTrie::Init() {
1153   if (is_initialized_) return true;
1154 
1155   if (storage_ != nullptr) {
1156     ICING_LOG(FATAL) << "Storage is not null before initialization";
1157   }
1158 
1159   storage_ = std::make_unique<IcingDynamicTrieStorage>(
1160       filename_base_, runtime_options_, filesystem_);
1161   if (!storage_->Init() || !InitPropertyBitmaps()) {
1162     storage_.reset();
1163     return false;
1164   }
1165   is_initialized_ = true;
1166   return true;
1167 }
1168 
CreateIfNotExist(const Options & options)1169 bool IcingDynamicTrie::CreateIfNotExist(const Options &options) {
1170   // Initialized means exists.
1171   if (is_initialized_) return true;
1172 
1173   if (!options.is_valid()) {
1174     ICING_LOG(ERROR) << "Trie options invalid";
1175     return false;
1176   }
1177 
1178   auto storage = std::make_unique<IcingDynamicTrieStorage>(
1179       filename_base_, runtime_options_, filesystem_);
1180   return storage->CreateIfNotExist(options);
1181 }
1182 
Close()1183 void IcingDynamicTrie::Close() {
1184   if (!is_initialized_) return;
1185 
1186   UpdateCrc();
1187 
1188   storage_.reset();
1189   property_bitmaps_.clear();
1190   deleted_bitmap_.reset();
1191   is_initialized_ = false;
1192 }
1193 
Remove()1194 bool IcingDynamicTrie::Remove() {
1195   if (is_initialized()) {
1196     Close();
1197   }
1198 
1199   bool success = true;
1200 
1201   // Remove storage files.
1202   if (!IcingDynamicTrieStorage::Remove(filename_base_, *filesystem_)) {
1203     success = false;
1204   }
1205 
1206   // Also remove property bitmaps.
1207   std::vector<std::string> files;
1208   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1209                                      &files)) {
1210     return false;
1211   }
1212   for (size_t i = 0; i < files.size(); i++) {
1213     if (!filesystem_->DeleteFile(files[i].c_str())) success = false;
1214   }
1215   // And deleted bitmap.
1216   if (!filesystem_->DeleteFile(deleted_bitmap_filename_.c_str()))
1217     success = false;
1218 
1219   return success;
1220 }
1221 
Sync()1222 bool IcingDynamicTrie::Sync() {
1223   if (!is_initialized_) {
1224     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1225   }
1226 
1227   bool success = true;
1228   IcingTimer timer;
1229 
1230   // Sync property bitmaps.
1231   for (size_t i = 0; i < property_bitmaps_.size(); i++) {
1232     if (property_bitmaps_[i]) {
1233       if (!property_bitmaps_[i]->Sync()) success = false;
1234     }
1235   }
1236   if (!deleted_bitmap_->Sync()) success = false;
1237 
1238   // Sync storage.
1239   if (!storage_->Sync()) success = false;
1240 
1241   Warm();
1242 
1243   ICING_VLOG(1) << "Syncing dynamic trie " << filename_base_.c_str() << " took "
1244                 << timer.Elapsed() * 1000 << "ms";
1245 
1246   return success;
1247 }
1248 
GetDiskUsage() const1249 uint64_t IcingDynamicTrie::GetDiskUsage() const {
1250   uint64_t total = 0;
1251   // Property bitmaps.
1252   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1253                                            &total);
1254 
1255   for (auto &bitmap : property_bitmaps_) {
1256     if (bitmap == nullptr) continue;
1257     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1258   }
1259 
1260   // Storage.
1261   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetDiskUsage(), &total);
1262   return total;
1263 }
1264 
GetElementsSize() const1265 uint64_t IcingDynamicTrie::GetElementsSize() const {
1266   uint64_t total = 0;
1267 
1268   // Bitmaps are sparsely populated, so disk usage is more accurate for those.
1269   // Property bitmaps.
1270   IcingFilesystem::IncrementByOrSetInvalid(deleted_bitmap_->GetDiskUsage(),
1271                                            &total);
1272   // The deleted bitmap is always initially grown to kGrowSize, whether there
1273   // are elements or not. So even if there are no elements in the trie, we'll
1274   // still have the bitmap of size kGrowSize, so subtract that from the size of
1275   // the trie's elements.
1276   total -= IcingFlashBitmap::kGrowSize;
1277 
1278   for (auto &bitmap : property_bitmaps_) {
1279     if (bitmap == nullptr) continue;
1280     IcingFilesystem::IncrementByOrSetInvalid(bitmap->GetDiskUsage(), &total);
1281   }
1282 
1283   // Storage. We can use file size here since the storage files aren't sparse.
1284   IcingFilesystem::IncrementByOrSetInvalid(storage_->GetElementsFileSize(),
1285                                            &total);
1286   return total;
1287 }
1288 
OpenAndInitBitmap(const std::string & filename,bool verify,const IcingFilesystem * filesystem)1289 std::unique_ptr<IcingFlashBitmap> IcingDynamicTrie::OpenAndInitBitmap(
1290     const std::string &filename, bool verify,
1291     const IcingFilesystem *filesystem) {
1292   auto bitmap = std::make_unique<IcingFlashBitmap>(filename, filesystem);
1293   if (!bitmap->Init() || (verify && !bitmap->Verify())) {
1294     ICING_LOG(ERROR) << "Init of " << filename.c_str() << " failed";
1295     return nullptr;
1296   }
1297   return bitmap;
1298 }
1299 
InitPropertyBitmaps()1300 bool IcingDynamicTrie::InitPropertyBitmaps() {
1301   // Only called on init.
1302   if (!property_bitmaps_.empty()) {
1303     ICING_LOG(FATAL) << "Property bitmaps not empty before initialization";
1304   }
1305 
1306   if (deleted_bitmap_ != nullptr) {
1307     ICING_LOG(FATAL) << "Deleted bitmap not null before initialization";
1308   }
1309 
1310   // Truncate property bitmap files at current value index. Last value
1311   // is at suffixes_size - value_size(). We want to clear everything
1312   // after that.
1313   uint64_t truncate_idx =
1314       storage_->hdr().suffixes_size() > 0
1315           ? ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size() -
1316                                             value_size()) +
1317                 1
1318           : 0;
1319 
1320   // Discover property bitmaps by scanning the dir.
1321   std::vector<std::string> files;
1322   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
1323                                      &files)) {
1324     ICING_LOG(ERROR) << "Could not get files at prefix "
1325                      << property_bitmaps_prefix_;
1326     goto failed;
1327   }
1328   for (size_t i = 0; i < files.size(); i++) {
1329     // Decode property id from filename.
1330     size_t property_id_start_idx = files[i].rfind('.');
1331     if (property_id_start_idx == std::string::npos) {
1332       ICING_LOG(ERROR) << "Malformed filename " << files[i];
1333       continue;
1334     }
1335     property_id_start_idx++;  // skip dot
1336     char *end;
1337     uint32_t property_id =
1338         strtol(files[i].c_str() + property_id_start_idx, &end, 10);  // NOLINT
1339     if (!end || end != (files[i].c_str() + files[i].size())) {
1340       ICING_LOG(ERROR) << "Malformed filename " << files[i];
1341       continue;
1342     }
1343     std::unique_ptr<IcingFlashBitmap> bitmap = OpenAndInitBitmap(
1344         files[i],
1345         runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1346         filesystem_);
1347     if (!bitmap) {
1348       ICING_LOG(ERROR) << "Open prop bitmap failed: " << files[i];
1349       goto failed;
1350     }
1351     bitmap->Truncate(truncate_idx);
1352     if (property_id >= property_bitmaps_.size()) {
1353       property_bitmaps_.resize(property_id + 1);
1354     }
1355     property_bitmaps_[property_id] = std::move(bitmap);
1356   }
1357 
1358   deleted_bitmap_ = OpenAndInitBitmap(
1359       deleted_bitmap_filename_,
1360       runtime_options_.storage_policy == RuntimeOptions::kMapSharedWithCrc,
1361       filesystem_);
1362   if (!deleted_bitmap_) {
1363     goto failed;
1364   }
1365   deleted_bitmap_->Truncate(truncate_idx);
1366 
1367   return true;
1368 
1369 failed:
1370   property_bitmaps_.clear();
1371   deleted_bitmap_.reset();
1372   return false;
1373 }
1374 
Warm() const1375 void IcingDynamicTrie::Warm() const {
1376   if (!is_initialized()) {
1377     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1378   }
1379 
1380   return storage_->Warm();
1381 }
1382 
size() const1383 uint32_t IcingDynamicTrie::size() const {
1384   if (!is_initialized()) {
1385     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1386   }
1387   return storage_->hdr().num_keys();
1388 }
1389 
empty() const1390 bool IcingDynamicTrie::empty() const {
1391   if (!is_initialized()) {
1392     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1393   }
1394   return storage_->empty();
1395 }
1396 
CollectStatsRecursive(const Node & node,Stats * stats,uint32_t depth) const1397 void IcingDynamicTrie::CollectStatsRecursive(const Node &node, Stats *stats,
1398                                              uint32_t depth) const {
1399   // This function should be called only if the node is valid. The first call is
1400   // always from the root node, so it means the trie should not be empty at this
1401   // moment. Otherwise (root) node could contain invalid next_index(),
1402   // is_leaf(), and log2_num_children().
1403   if (node.is_leaf()) {
1404     stats->num_leaves++;
1405     stats->sum_depth += depth;
1406     stats->max_depth = std::max(stats->max_depth, depth);
1407     const char *suffix = storage_->GetSuffix(node.next_index());
1408     stats->suffixes_used += strlen(suffix) + 1 + value_size();
1409     if (!suffix[0]) {
1410       stats->null_suffixes++;
1411     }
1412   } else {
1413     stats->num_intermediates++;
1414     uint32_t i = 0;
1415     for (; i < (1U << node.log2_num_children()); i++) {
1416       const Next &next = *storage_->GetNext(node.next_index(), i);
1417       if (next.node_index() == kInvalidNodeIndex) break;
1418       CollectStatsRecursive(*storage_->GetNode(next.node_index()), stats,
1419                             depth + 1);
1420     }
1421 
1422     // At least one valid node in each next array
1423     if (i == 0) {
1424       ICING_LOG(FATAL) << "No valid node in 'next' array";
1425     }
1426     stats->sum_children += i;
1427     stats->max_children = std::max(stats->max_children, i);
1428 
1429     stats->child_counts[i - 1]++;
1430     stats->wasted[node.log2_num_children()] +=
1431         (1 << node.log2_num_children()) - i;
1432     stats->total_wasted += (1 << node.log2_num_children()) - i;
1433   }
1434 }
1435 
CollectStats(Stats * stats) const1436 void IcingDynamicTrie::CollectStats(Stats *stats) const {
1437   if (!is_initialized()) {
1438     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1439   }
1440 
1441   memset(stats, 0, sizeof(*stats));
1442 
1443   stats->num_keys = storage_->hdr().num_keys();
1444   stats->num_nodes = storage_->hdr().num_nodes();
1445   stats->max_nodes = storage_->hdr().max_nodes();
1446   stats->num_nexts = storage_->hdr().num_nexts();
1447   stats->max_nexts = storage_->hdr().max_nexts();
1448   stats->suffixes_size = storage_->hdr().suffixes_size();
1449   stats->max_suffixes_size = storage_->hdr().max_suffixes_size();
1450 
1451   // Stats collected from traversing the trie.
1452   if (!storage_->empty()) {
1453     CollectStatsRecursive(*storage_->GetRootNode(), stats);
1454   }
1455 
1456   // Free-list stats.
1457   for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1458     for (uint32_t cur = storage_->hdr().free_lists(i); cur != kInvalidNextIndex;
1459          cur = storage_->GetNext(cur, 0)->next_index()) {
1460       stats->num_free[i]++;
1461     }
1462     stats->total_free += stats->num_free[i] * (1 << i);
1463   }
1464 
1465   // Dirty page counts.
1466   storage_->FillDirtyPageStats(stats);
1467 }
1468 
DumpStats(int verbosity) const1469 std::string IcingDynamicTrie::Stats::DumpStats(int verbosity) const {
1470   std::string ret;
1471   IcingStringUtil::SStringAppendF(
1472       &ret, 0,
1473       "Keys %u "
1474       "Nodes (%u/%u) %.3f%% "
1475       "Nexts (%u/%u) %.3f%% "
1476       "Suffixes (%u/%u) %.3f%%\n",
1477       num_keys, num_nodes, max_nodes,
1478       100. * math_util::SafeDivide(num_nodes, max_nodes), num_nexts, max_nexts,
1479       100. * math_util::SafeDivide(num_nexts, max_nexts), suffixes_size,
1480       max_suffixes_size,
1481       100. * math_util::SafeDivide(suffixes_size, max_suffixes_size));
1482 
1483   if (verbosity > 0) {
1484     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1485       if (num_free[i] > 0) {
1486         IcingStringUtil::SStringAppendF(&ret, 0, "Freelist@%d: %u\n", 1 << i,
1487                                         num_free[i]);
1488       }
1489     }
1490     IcingStringUtil::SStringAppendF(
1491         &ret, 0, "Freelist total: %u/%u %.3f%%\n", total_free, num_nexts,
1492         100. * math_util::SafeDivide(total_free, num_nexts));
1493 
1494     for (int i = 0; i < 256; i++) {
1495       if (child_counts[i] > 0) {
1496         IcingStringUtil::SStringAppendF(&ret, 0, "Child count@%d: %u\n", i + 1,
1497                                         child_counts[i]);
1498       }
1499     }
1500     for (int i = 0; i < kNumNextAllocationBuckets; i++) {
1501       IcingStringUtil::SStringAppendF(&ret, 0, "Wasted@%d: %u\n", 1 << i,
1502                                       wasted[i]);
1503     }
1504     IcingStringUtil::SStringAppendF(
1505         &ret, 0,
1506         "Wasted total: %u\n"
1507         "Num intermediates %u num leaves %u "
1508         "suffixes used %u null %u\n"
1509         "avg and max children for intermediates: %.3f, %u\n"
1510         "avg and max depth for leaves: %.3f, %u\n"
1511         "Total next frag: %.3f%%\n",
1512         total_wasted, num_intermediates, num_leaves, suffixes_used,
1513         null_suffixes, 1. * sum_children / num_intermediates, max_children,
1514         1. * sum_depth / num_leaves, max_depth,
1515         100. * math_util::SafeDivide((total_free + total_wasted), num_nexts));
1516   }
1517   IcingStringUtil::SStringAppendF(
1518       &ret, 0, "Memory usage: %zu/%zu bytes\n",
1519       num_nodes * sizeof(Node) + num_nexts * sizeof(Next) + suffixes_size,
1520       max_nodes * sizeof(Node) + max_nexts * sizeof(Next) + max_suffixes_size);
1521 
1522   IcingStringUtil::SStringAppendF(
1523       &ret, 0, "Dirty pages: nodes %u/%.0f nexts %u/%.0f suffixes %u/%.0f\n",
1524       dirty_pages_nodes,
1525       math_util::SafeDivide(num_nodes * sizeof(Node) + getpagesize() - 1,
1526                             getpagesize()),
1527       dirty_pages_nexts,
1528       math_util::SafeDivide(num_nexts * sizeof(Next) + getpagesize() - 1,
1529                             getpagesize()),
1530       dirty_pages_suffixes,
1531       math_util::SafeDivide(suffixes_size + getpagesize() - 1, getpagesize()));
1532 
1533   return ret;
1534 }
1535 
DumpTrie(std::ostream * pretty_print,std::vector<std::string> * keys) const1536 void IcingDynamicTrie::DumpTrie(std::ostream *pretty_print,
1537                                 std::vector<std::string> *keys) const {
1538   if (!is_initialized()) {
1539     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1540   }
1541 
1542   Dumper dumper(*this);
1543   dumper.Dump(pretty_print, keys);
1544 }
1545 
Clear()1546 void IcingDynamicTrie::Clear() {
1547   if (!is_initialized()) {
1548     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1549   }
1550 
1551   storage_->Clear();
1552   for (auto &bitmap : property_bitmaps_) {
1553     if (bitmap) {
1554       bitmap->Delete();
1555       bitmap.reset();
1556     }
1557   }
1558   deleted_bitmap_->Truncate(0);
1559 }
1560 
ClearSuffixAndValue(uint32_t suffix_value_index)1561 bool IcingDynamicTrie::ClearSuffixAndValue(uint32_t suffix_value_index) {
1562   // The size 1 below is for a '\0' between the suffix and the value.
1563   size_t suffix_and_value_length =
1564       strlen(this->storage_->GetSuffix(suffix_value_index)) + 1 +
1565       this->value_size();
1566   char *mutable_suffix_and_value = this->storage_->GetMutableSuffix(
1567       suffix_value_index, suffix_and_value_length);
1568 
1569   if (mutable_suffix_and_value == nullptr) {
1570     return false;
1571   }
1572 
1573   memset(mutable_suffix_and_value, 0, suffix_and_value_length);
1574   return true;
1575 }
1576 
ResetNext(uint32_t next_index)1577 bool IcingDynamicTrie::ResetNext(uint32_t next_index) {
1578   Next *mutable_next =
1579       this->storage_->GetMutableNextArray(next_index, /*len=*/1);
1580 
1581   if (mutable_next == nullptr) {
1582     return false;
1583   }
1584   ResetMutableNext(*mutable_next);
1585   return true;
1586 }
1587 
SortNextArray(const Node * node)1588 bool IcingDynamicTrie::SortNextArray(const Node *node) {
1589   if (node == nullptr) {
1590     // Nothing to sort, return success directly.
1591     return true;
1592   }
1593 
1594   uint32_t next_array_buffer_size = 1u << node->log2_num_children();
1595   Next *next_array_start = this->storage_->GetMutableNextArray(
1596       node->next_index(), next_array_buffer_size);
1597 
1598   if (next_array_start == nullptr) {
1599     return false;
1600   }
1601 
1602   std::sort(next_array_start, next_array_start + next_array_buffer_size);
1603   return true;
1604 }
1605 
Insert(std::string_view key,const void * value,uint32_t * value_index,bool replace,bool * pnew_key)1606 libtextclassifier3::Status IcingDynamicTrie::Insert(std::string_view key,
1607                                                     const void *value,
1608                                                     uint32_t *value_index,
1609                                                     bool replace,
1610                                                     bool *pnew_key) {
1611   if (!is_initialized()) {
1612     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1613   }
1614 
1615   if (pnew_key) *pnew_key = false;
1616 
1617   if (!IsKeyValid(key)) {
1618     return absl_ports::InvalidArgumentError(
1619         "Key cannot contain a null character '\\0'");
1620   }
1621 
1622   // Find out ahead of time whether things will fit. A conservative
1623   // check based on allocations made below.
1624   //
1625   // IMPORTANT: This needs to be updated if the alloc patterns below
1626   // change.
1627   if (!(storage_->nodes_left() >= 2 + key.size() + 1 &&
1628         storage_->nexts_left() >= 2 + key.size() + 1 + kMaxNextArraySize &&
1629         storage_->suffixes_left() >= key.size() + 1 + value_size())) {
1630     return absl_ports::ResourceExhaustedError("No more space left");
1631   }
1632 
1633   uint32_t best_node_index;
1634   int key_offset;
1635   FindBestNode(key, &best_node_index, &key_offset, false);
1636 
1637   // A negative key_offset indicates that storage_ is empty
1638   if (key_offset < 0) {
1639     // First key.
1640     if (!storage_->empty()) {
1641       ICING_LOG(FATAL) << "Key offset is negative but storage is not empty, "
1642                           "there're inconsistencies in dynamic trie.";
1643     }
1644     Node *node = storage_->AllocNode();
1645     node->set_next_index(storage_->MakeSuffix(key, value, value_index));
1646     node->set_is_leaf(true);
1647     node->set_log2_num_children(0);
1648   } else if (storage_->GetNode(best_node_index)->is_leaf()) {
1649     // Prefix in the trie. Split at leaf.
1650     Node *split_node = storage_->GetMutableNode(best_node_index);
1651     const char *prev_suffix = storage_->GetSuffix(split_node->next_index());
1652 
1653     // Find the common prefix length starting from prev_suffix[0] and
1654     // key[key_offset].
1655     // - prev_suffix terminates with '\0'.
1656     // - key is a std::string_view object, so it may not be null-terminated.
1657     // - key doesn't contain '\0' because it's checked in IsKeyValid() above.
1658     int common_prefix_len = 0;
1659     while (key_offset + common_prefix_len < key.size() &&
1660            prev_suffix[common_prefix_len] ==
1661                key[key_offset + common_prefix_len]) {
1662       ++common_prefix_len;
1663     }
1664 
1665     // Equal strings
1666     bool strings_equal = prev_suffix[common_prefix_len] == '\0' &&
1667                          key_offset + common_prefix_len >= key.size();
1668     if (strings_equal) {
1669       if (value_index) {
1670         *value_index =
1671             storage_->GetSuffixIndex(prev_suffix + common_prefix_len + 1);
1672       }
1673       // Update value if replace == true and return.
1674       if (replace) {
1675         char *mutable_prev_suffix_cur = storage_->GetMutableSuffix(
1676             storage_->GetSuffixIndex(prev_suffix + common_prefix_len + 1),
1677             value_size());
1678         memcpy(mutable_prev_suffix_cur, value, value_size());
1679       }
1680       return libtextclassifier3::Status::OK;
1681     }
1682 
1683     // Create single-branch children for the common prefix
1684     // length. After the loop, split_node points to the node that
1685     // will have more than 1 char.
1686     for (int i = 0; i < common_prefix_len; i++) {
1687       // Create a single-branch child node.
1688       ICING_ASSIGN_OR_RETURN(Next * split_next, storage_->AllocNextArray(1));
1689       split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1690       split_node->set_is_leaf(false);
1691       split_node->set_log2_num_children(0);
1692       Node *child_node = storage_->AllocNode();
1693       split_next[0].set_val(*(prev_suffix + i));
1694       split_next[0].set_node_index(storage_->GetNodeIndex(child_node));
1695 
1696       split_node = child_node;
1697     }
1698 
1699     // Fill a split.
1700     ICING_ASSIGN_OR_RETURN(Next * split_next, storage_->AllocNextArray(2));
1701     split_node->set_next_index(storage_->GetNextArrayIndex(split_next));
1702     split_node->set_is_leaf(false);
1703     split_node->set_log2_num_children(1);
1704     Node *prev_suffix_node = storage_->AllocNode();
1705     Node *key_node = storage_->AllocNode();
1706     split_next[0].set_val(*(prev_suffix + common_prefix_len));
1707     split_next[0].set_node_index(storage_->GetNodeIndex(prev_suffix_node));
1708     if (*(prev_suffix + common_prefix_len)) {
1709       uint32_t next_index =
1710           storage_->GetSuffixIndex(prev_suffix + common_prefix_len) + 1;
1711       prev_suffix_node->set_next_index(next_index);
1712     } else {
1713       uint32_t next_index =
1714           storage_->GetSuffixIndex(prev_suffix + common_prefix_len);
1715       prev_suffix_node->set_next_index(next_index);
1716     }
1717 
1718     char next_val = GetCharOrNull(key, key_offset + common_prefix_len);
1719     prev_suffix_node->set_is_leaf(true);
1720     prev_suffix_node->set_log2_num_children(0);
1721     split_next[1].set_val(next_val);
1722     split_next[1].set_node_index(storage_->GetNodeIndex(key_node));
1723     if (next_val != '\0') {
1724       uint32_t next_index = storage_->MakeSuffix(
1725           key.substr(key_offset + common_prefix_len + 1), value, value_index);
1726       key_node->set_next_index(next_index);
1727     } else {
1728       uint32_t next_index = storage_->MakeSuffix(
1729           key.substr(key_offset + common_prefix_len), value, value_index);
1730       key_node->set_next_index(next_index);
1731     }
1732     key_node->set_is_leaf(true);
1733     key_node->set_log2_num_children(0);
1734 
1735     std::sort(split_next, split_next + 2);
1736   } else {
1737     // Insert into intermediate node.
1738     const Node *best_node = storage_->GetNode(best_node_index);
1739 
1740     // Add our value as a node + suffix.
1741     Node *new_leaf_node = storage_->AllocNode();
1742     if (key_offset < key.size()) {
1743       uint32_t next_index =
1744           storage_->MakeSuffix(key.substr(key_offset + 1), value, value_index);
1745       new_leaf_node->set_next_index(next_index);
1746     } else {
1747       uint32_t next_index =
1748           storage_->MakeSuffix(key.substr(key_offset), value, value_index);
1749       new_leaf_node->set_next_index(next_index);
1750     }
1751     new_leaf_node->set_is_leaf(true);
1752     new_leaf_node->set_log2_num_children(0);
1753 
1754     // Figure out the real length of the existing next array.
1755     uint32_t next_array_buffer_size = 1u << best_node->log2_num_children();
1756     Next *cur_next = storage_->GetMutableNextArray(best_node->next_index(),
1757                                                    next_array_buffer_size);
1758     int next_len = GetValidNextsSize(cur_next, next_array_buffer_size);
1759     Next *new_next = cur_next;
1760     if (next_len == (next_array_buffer_size)) {
1761       // Allocate a new, larger, array.
1762       ICING_ASSIGN_OR_RETURN(new_next, storage_->AllocNextArray(next_len + 1));
1763       memcpy(new_next, cur_next, sizeof(Next) * next_len);
1764     }
1765 
1766     // Write a link to our new leaf node and sort.
1767     new_next[next_len].set_val(GetCharOrNull(key, key_offset));
1768     new_next[next_len].set_node_index(storage_->GetNodeIndex(new_leaf_node));
1769     std::inplace_merge(new_next, new_next + next_len, new_next + next_len + 1);
1770     next_len++;
1771 
1772     // If this was new, update the parent node and free the old next
1773     // array.
1774     if (new_next != cur_next) {
1775       Node *mutable_best_node =
1776           storage_->GetMutableNode(storage_->GetNodeIndex(best_node));
1777       mutable_best_node->set_next_index(storage_->GetNextArrayIndex(new_next));
1778       mutable_best_node->set_is_leaf(false);
1779       uint8_t log2_num_children = mutable_best_node->log2_num_children();
1780 
1781       // 8 == log2(256)
1782       if (log2_num_children >= 8) {
1783         return absl_ports::InternalError(
1784             "Number of children exceeds the max allowed size");
1785       }
1786 
1787       mutable_best_node->set_log2_num_children(log2_num_children + 1);
1788 
1789       storage_->FreeNextArray(cur_next,
1790                               mutable_best_node->log2_num_children() - 1);
1791     }
1792   }
1793 
1794   // We added a new key.
1795   storage_->inc_num_keys();
1796 
1797   if (pnew_key) *pnew_key = true;
1798   return libtextclassifier3::Status::OK;
1799 }
1800 
GetValueAtIndex(uint32_t value_index) const1801 const void *IcingDynamicTrie::GetValueAtIndex(uint32_t value_index) const {
1802   if (!is_initialized()) {
1803     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1804   }
1805 
1806   return static_cast<const void *>(storage_->GetSuffix(value_index));
1807 }
1808 
SetValueAtIndex(uint32_t value_index,const void * value)1809 void IcingDynamicTrie::SetValueAtIndex(uint32_t value_index,
1810                                        const void *value) {
1811   if (!is_initialized()) {
1812     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1813   }
1814 
1815   if (value_index > storage_->hdr().max_suffixes_size() - value_size()) {
1816     ICING_LOG(FATAL) << "Value index is out of range";
1817   }
1818 
1819   memcpy(storage_->GetMutableSuffix(value_index, value_size()), value,
1820          value_size());
1821 }
1822 
Find(std::string_view key,void * value,uint32_t * value_index) const1823 bool IcingDynamicTrie::Find(std::string_view key, void *value,
1824                             uint32_t *value_index) const {
1825   if (!is_initialized()) {
1826     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1827   }
1828 
1829   if (!IsKeyValid(key)) {
1830     return false;
1831   }
1832 
1833   uint32_t best_node_index;
1834   int key_offset;
1835   FindBestNode(key, &best_node_index, &key_offset, false);
1836 
1837   if (key_offset < 0) {
1838     return false;
1839   }
1840 
1841   const Node *best_node = storage_->GetNode(best_node_index);
1842   if (!best_node->is_leaf()) {
1843     return false;
1844   }
1845 
1846   std::string_view suffix(storage_->GetSuffix(best_node->next_index()));
1847   if (key.substr(key_offset) != suffix) {
1848     return false;
1849   }
1850 
1851   uint32_t vidx = best_node->next_index() + suffix.size() + 1;
1852   if (value_index) *value_index = vidx;
1853   if (value) memcpy(value, storage_->GetSuffix(vidx), value_size());
1854   return true;
1855 }
1856 
Iterator(const IcingDynamicTrie & trie,std::string prefix,bool reverse)1857 IcingDynamicTrie::Iterator::Iterator(const IcingDynamicTrie &trie,
1858                                      std::string prefix, bool reverse)
1859     : cur_key_(std::move(prefix)),
1860       cur_suffix_(nullptr),
1861       cur_suffix_len_(0),
1862       single_leaf_match_(false),
1863       reverse_(reverse),
1864       trie_(trie) {
1865   if (!trie.is_initialized()) {
1866     ICING_LOG(FATAL) << "DynamicTrie not initialized";
1867   }
1868 
1869   Reset();
1870 }
1871 
BranchToLeaf(uint32_t node_index,BranchType branch_type)1872 void IcingDynamicTrie::Iterator::BranchToLeaf(uint32_t node_index,
1873                                               BranchType branch_type) {
1874   // Go down the trie, following the left-most child until we hit a
1875   // leaf. Push to stack and cur_key nodes and chars as we go.
1876   // When reverse_ is true, the method will follow the right-most child.
1877   const Node *node = trie_.storage_->GetNode(node_index);
1878   while (!node->is_leaf()) {
1879     const Next *next_start = trie_.storage_->GetNext(node->next_index(), 0);
1880     int child_idx;
1881     if (branch_type == BranchType::kRightMost) {
1882       uint32_t next_array_size = 1u << node->log2_num_children();
1883       child_idx = trie_.GetValidNextsSize(next_start, next_array_size) - 1;
1884     } else {
1885       // node isn't a leaf. So it must have >0 children.
1886       // 0 is the left-most child.
1887       child_idx = 0;
1888     }
1889     const Next &child_next = next_start[child_idx];
1890     branch_stack_.push_back(Branch(node_index, child_idx));
1891     cur_key_.push_back(child_next.val());
1892 
1893     node_index = child_next.node_index();
1894     node = trie_.storage_->GetNode(node_index);
1895   }
1896 
1897   // We're at a leaf.
1898   cur_suffix_ = trie_.storage_->GetSuffix(
1899       trie_.storage_->GetNode(node_index)->next_index());
1900   cur_suffix_len_ = strlen(cur_suffix_);
1901   cur_key_.append(cur_suffix_, cur_suffix_len_);
1902 }
1903 
Reset()1904 void IcingDynamicTrie::Iterator::Reset() {
1905   if (!IsKeyValid(cur_key_)) {
1906     // Set invalid and return.
1907     cur_suffix_ = nullptr;
1908     cur_suffix_len_ = 0;
1909     return;
1910   }
1911 
1912   size_t strip_len = branch_stack_.size() + cur_suffix_len_;
1913 
1914   if (cur_key_.size() < strip_len) {
1915     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1916                         "size, there're inconsistencies in dynamic trie";
1917   }
1918 
1919   // Trim back cur_key_ to original prefix.
1920   cur_key_.resize(cur_key_.size() - strip_len);
1921   cur_suffix_ = nullptr;
1922   cur_suffix_len_ = 0;
1923   single_leaf_match_ = false;
1924   branch_stack_.clear();
1925 
1926   // Nothing to do with an empty trie.
1927   if (trie_.storage_->empty()) return;
1928 
1929   // Find node matching prefix.
1930   uint32_t node_index;
1931   int key_offset;
1932   trie_.FindBestNode(cur_key_, &node_index, &key_offset, true);
1933 
1934   // Two cases/states:
1935   //
1936   // - Found an intermediate node. If we matched all of prefix
1937   //   (cur_key_), BranchToLeaf.
1938   //
1939   // - Found a leaf node, which is the ONLY matching key for this
1940   //   prefix. Check that suffix matches the prefix. Then we set
1941   //   single_leaf_match_ = true and apply different logic for
1942   //   Advance.
1943   if (key_offset < 0) {
1944     // A negative key_offset indicates that trie_.storage_ is empty
1945     ICING_LOG(FATAL) << "Trie storage is empty";
1946   }
1947 
1948   const Node *best_node = trie_.storage_->GetNode(node_index);
1949   if (best_node->is_leaf() &&
1950       !strncmp(cur_key_.c_str() + key_offset,
1951                trie_.storage_->GetSuffix(best_node->next_index()),
1952                cur_key_.size() - key_offset)) {
1953     // Copy the entire suffix into the current key.
1954     cur_key_.resize(key_offset);
1955     cur_key_.append(trie_.storage_->GetSuffix(best_node->next_index()));
1956     cur_suffix_ = trie_.storage_->GetSuffix(best_node->next_index());
1957     cur_suffix_len_ = strlen(cur_suffix_);
1958     single_leaf_match_ = true;
1959   } else if (static_cast<size_t>(key_offset) == cur_key_.size()) {
1960     BranchType branch_type =
1961         (reverse_) ? BranchType::kRightMost : BranchType::kLeftMost;
1962     BranchToLeaf(node_index, branch_type);
1963   }
1964 }
1965 
Advance()1966 bool IcingDynamicTrie::Iterator::Advance() {
1967   if (!IsValid()) return false;
1968   if (single_leaf_match_) {
1969     // If we only have an exact match, the Advance logic does not
1970     // apply. Invalidate the iterator and return.
1971     cur_suffix_ = nullptr;
1972     cur_suffix_len_ = 0;
1973     return false;
1974   }
1975 
1976   if (cur_key_.size() < (branch_stack_.size() + cur_suffix_len_)) {
1977     ICING_LOG(FATAL) << "Key size < visited trie depth + remaining suffix "
1978                         "size, there're inconsistencies in dynamic trie";
1979   }
1980 
1981   // Move up from the current leaf.
1982   cur_key_.resize(cur_key_.size() - cur_suffix_len_);
1983   cur_suffix_ = nullptr;
1984   cur_suffix_len_ = 0;
1985 
1986   while (!branch_stack_.empty()) {
1987     Branch *branch = &branch_stack_.back();
1988     const Node *node = trie_.storage_->GetNode(branch->node_idx);
1989     if (reverse_) {
1990       branch->child_idx--;
1991     } else {
1992       branch->child_idx++;
1993     }
1994     if (branch->child_idx >= 0 &&
1995         branch->child_idx < (1 << node->log2_num_children())) {
1996       const Next *child_next =
1997           trie_.storage_->GetNext(node->next_index(), branch->child_idx);
1998       if (child_next->node_index() != kInvalidNodeIndex) {
1999         // Successfully incremented to the next child. Update the char
2000         // value at this depth.
2001         cur_key_[cur_key_.size() - 1] = child_next->val();
2002         // We successfully found a sub-trie to explore.
2003         BranchType branch_type =
2004             (reverse_) ? BranchType::kRightMost : BranchType::kLeftMost;
2005         BranchToLeaf(child_next->node_index(), branch_type);
2006         return true;
2007       }
2008     }
2009     branch_stack_.pop_back();
2010     cur_key_.resize(cur_key_.size() - 1);
2011   }
2012 
2013   // Un-wound the entire stack. We are done.
2014   return false;
2015 }
2016 
IsValid() const2017 bool IcingDynamicTrie::Iterator::IsValid() const {
2018   return cur_suffix_ != nullptr;
2019 }
2020 
GetKey() const2021 std::string_view IcingDynamicTrie::Iterator::GetKey() const {
2022   // cur_key_ can have a NULL in it so cur_key_ can be wrong but
2023   // cur_key_.c_str() is always right.
2024   return IsValid() ? cur_key_.c_str() : std::string_view();
2025 }
2026 
GetValue() const2027 const void *IcingDynamicTrie::Iterator::GetValue() const {
2028   if (!IsValid()) return nullptr;
2029 
2030   return static_cast<const void *>(cur_suffix_ + cur_suffix_len_ + 1);
2031 }
2032 
GetValueIndex() const2033 uint32_t IcingDynamicTrie::Iterator::GetValueIndex() const {
2034   if (!IsValid()) return kInvalidSuffixIndex;
2035 
2036   return trie_.storage_->GetSuffixIndex(cur_suffix_ + cur_suffix_len_ + 1);
2037 }
2038 
LeftBranchToUtf8End()2039 void IcingDynamicTrie::Utf8Iterator::LeftBranchToUtf8End() {
2040   if (cur_len_ <= 0) {
2041     ICING_LOG(FATAL) << "Invalid UTF-8 character length";
2042   }
2043 
2044   if (branch_end_ - branch_stack_ != cur_len_) {
2045     ICING_LOG(FATAL) << "Depth from first visited node to last visited node "
2046                         "doesn't match the current UTF-8 character length";
2047   }
2048 
2049   // Use branch at top of stack to determine where to follow.
2050   const Branch &branch = *(branch_end_ - 1);
2051   const Node *node = trie_.storage_->GetNode(branch.child->node_index());
2052 
2053   // If we start with non-ascii, take all left branches while there is
2054   // a continuation byte.
2055   if (!i18n_utils::IsAscii(cur_[cur_len_ - 1])) {
2056     while (!node->is_leaf()) {
2057       if (cur_len_ >= U8_MAX_LENGTH) break;
2058 
2059       InitBranch(branch_end_, node, 0);
2060       // When we are looking to complete a utf8 char, skip 0s.
2061       if (branch_end_->child->val() == 0) {
2062         // Check if we already have a valid cur_.
2063         cur_[cur_len_] = 0;
2064         UChar32 uchar32 = i18n_utils::GetUChar32At(cur_, cur_len_, 0);
2065         if (uchar32 == i18n_utils::kInvalidUChar32 &&
2066             node->log2_num_children() > 0) {
2067           branch_end_->child++;
2068         } else {
2069           // Good termination. Just break.
2070           break;
2071         }
2072       }
2073 
2074       if (!IcingStringUtil::IsContinuationByte(branch_end_->child->val()))
2075         break;
2076 
2077       cur_[cur_len_++] = branch_end_->child->val();
2078       node = trie_.storage_->GetNode(branch_end_->child->node_index());
2079       branch_end_++;
2080     }
2081 
2082     cur_logical_node_.node = node;
2083 
2084     // Maybe go into suffixes and set suffix_offset.
2085     if (node->is_leaf()) {
2086       GoIntoSuffix(node);
2087     } else {
2088       cur_logical_node_.suffix_offset = 0;
2089     }
2090   } else {  // ascii
2091     cur_logical_node_.node = node;
2092     cur_logical_node_.suffix_offset = 0;
2093   }
2094 
2095   // NULL-terminate.
2096   cur_[cur_len_] = 0;
2097 }
2098 
GoIntoSuffix(const Node * node)2099 void IcingDynamicTrie::Utf8Iterator::GoIntoSuffix(const Node *node) {
2100   const char *suffix = trie_.storage_->GetSuffix(node->next_index());
2101   const char *cur_suffix;
2102   for (cur_suffix = suffix; cur_len_ < U8_MAX_LENGTH &&
2103                             IcingStringUtil::IsContinuationByte(*cur_suffix);
2104        cur_suffix++) {
2105     cur_[cur_len_++] = *cur_suffix;
2106   }
2107   cur_logical_node_.suffix_offset = cur_suffix - suffix;
2108 }
2109 
Reset()2110 void IcingDynamicTrie::Utf8Iterator::Reset() {
2111   cur_[0] = 0;
2112   cur_len_ = 0;
2113   branch_end_ = branch_stack_;
2114 
2115   if (start_node_) {
2116     // Take the first char node's children.
2117     const Next *next = trie_.storage_->GetNext(start_node_->next_index(), 0);
2118     branch_end_->node = start_node_;
2119     branch_end_->child_end = next + (1 << start_node_->log2_num_children());
2120     if (next->val() == 0) {
2121       // Skip any nulls at this position. We don't return empty string
2122       // as an iteration.
2123       next++;
2124     }
2125     branch_end_->child = next;
2126     cur_[cur_len_++] = next->val();
2127     branch_end_++;
2128 
2129     // Will NULL-terminate cur_.
2130     LeftBranchToUtf8End();
2131   } else {
2132     // Nothing to return.
2133     cur_logical_node_.node = nullptr;
2134     cur_logical_node_.suffix_offset = 0;
2135   }
2136 }
2137 
Advance()2138 bool IcingDynamicTrie::Utf8Iterator::Advance() {
2139   if (!IsValid()) return false;
2140 
2141   // Clip to branch.
2142   cur_len_ = branch_end_ - branch_stack_;
2143 
2144   while (branch_end_ > branch_stack_) {
2145     Branch *branch = branch_end_ - 1;
2146     branch->child++;
2147     if (!branch->IsFinished()) {
2148       // Successfully incremented to the next child. Update the char
2149       // value at this depth.
2150       cur_[cur_len_ - 1] = branch->child->val();
2151 
2152       // We successfully found a sub-trie to explore.
2153       LeftBranchToUtf8End();
2154       return true;
2155     }
2156     cur_len_--;
2157     branch_end_--;
2158   }
2159 
2160   // Un-wound the entire stack. We are done.
2161   return false;
2162 }
2163 
InitBranch(Branch * branch,const Node * start,char key_char)2164 void IcingDynamicTrie::Utf8Iterator::InitBranch(Branch *branch,
2165                                                 const Node *start,
2166                                                 char key_char) {
2167   branch->node = start;
2168   branch->child = trie_.storage_->GetNext(start->next_index(), 0);
2169   branch->child_end = branch->child + (1 << start->log2_num_children());
2170   if (key_char) {
2171     branch->child =
2172         trie_.LowerBound(branch->child, branch->child_end, key_char);
2173   }
2174 }
2175 
IsFinished()2176 bool IcingDynamicTrie::Utf8Iterator::Branch::IsFinished() {
2177   return child >= child_end || child->node_index() == kInvalidNodeIndex;
2178 }
2179 
IsValid() const2180 bool IcingDynamicTrie::Utf8Iterator::IsValid() const { return cur_len_ > 0; }
2181 
GetNextByChar(const Node * node,uint8_t key_char) const2182 const IcingDynamicTrie::Next *IcingDynamicTrie::GetNextByChar(
2183     const Node *node, uint8_t key_char) const {
2184   const Next *next_start = storage_->GetNext(node->next_index(), 0);
2185   const Next *next_end = next_start + (1 << node->log2_num_children());
2186 
2187   const Next *found = LowerBound(next_start, next_end, key_char);
2188   if (found >= next_end || found->val() != key_char ||
2189       found->node_index() == kInvalidNodeIndex) {
2190     return nullptr;
2191   }
2192 
2193   return found;
2194 }
2195 
GetValidNextsSize(const IcingDynamicTrie::Next * next_array_start,int next_array_length) const2196 int IcingDynamicTrie::GetValidNextsSize(
2197     const IcingDynamicTrie::Next *next_array_start,
2198     int next_array_length) const {
2199   // Only searching for key char 0xff is not sufficient, as 0xff can be a valid
2200   // character. We must also specify kInvalidNodeIndex as the target node index
2201   // when searching the next array.
2202   return LowerBound(next_array_start, next_array_start + next_array_length,
2203                     /*key_char=*/0xff, /*node_index=*/kInvalidNodeIndex) -
2204          next_array_start;
2205 }
2206 
LowerBound(const Next * start,const Next * end,uint8_t key_char,uint32_t node_index) const2207 const IcingDynamicTrie::Next *IcingDynamicTrie::LowerBound(
2208     const Next *start, const Next *end, uint8_t key_char,
2209     uint32_t node_index) const {
2210   // Above this value will use binary search instead of linear
2211   // search. 16 was chosen from running some benchmarks with
2212   // different values.
2213   static const uint32_t kBinarySearchCutoff = 16;
2214 
2215   Next key_next(key_char, node_index);
2216   if (end - start >= kBinarySearchCutoff) {
2217     // Binary search.
2218     return std::lower_bound(start, end, key_next);
2219   } else {
2220     // Linear search.
2221     const Next *found;
2222     for (found = start; found < end; found++) {
2223       if (!(*found < key_next)) {
2224         // Should have gotten match.
2225         break;
2226       }
2227     }
2228     return found;
2229   }
2230 }
2231 
FindBestNode(std::string_view key,uint32_t * best_node_index,int * key_offset,bool prefix,bool utf8) const2232 void IcingDynamicTrie::FindBestNode(std::string_view key,
2233                                     uint32_t *best_node_index, int *key_offset,
2234                                     bool prefix, bool utf8) const {
2235   // Find the best node such that:
2236   //
2237   // - If key is NOT in the trie, key[0..key_offset) is a prefix to
2238   //   everything under best_node_index.
2239   //
2240   // - If key is in the trie, best_node_index is the leaf that points
2241   //   to the key suffix and key_offset == strlen(key).
2242   //
2243   // If prefix is true, when key is both in the trie AND a prefix
2244   // (e.g. "ab" and "abc" are in the trie), we return the intermediate
2245   // node with key as the prefix as opposed to the exactly matching
2246   // leaf node.
2247   if (storage_->empty()) {
2248     *best_node_index = 0;
2249     *key_offset = -1;
2250     return;
2251   }
2252 
2253   const Node *cur_node = storage_->GetRootNode();
2254   int cur_key_idx = 0;
2255   int utf8_key_idx = 0;
2256   const Node *utf8_node = cur_node;
2257   while (!cur_node->is_leaf()) {
2258     char cur_char = GetCharOrNull(key, cur_key_idx);
2259     const Next *found = GetNextByChar(cur_node, cur_char);
2260     if (!found) break;
2261 
2262     if (prefix && found->val() == 0) {
2263       break;
2264     }
2265 
2266     cur_node = storage_->GetNode(found->node_index());
2267 
2268     // End of key.
2269     if (cur_key_idx >= key.size()) {
2270       break;
2271     }
2272 
2273     ++cur_key_idx;
2274     cur_char = GetCharOrNull(key, cur_key_idx);
2275 
2276     if (utf8 && i18n_utils::IsLeadUtf8Byte(cur_char)) {
2277       utf8_node = cur_node;
2278       utf8_key_idx = cur_key_idx;
2279     }
2280   }
2281 
2282   if (utf8) {
2283     // Rewind.
2284     cur_node = utf8_node;
2285     cur_key_idx = utf8_key_idx;
2286   }
2287 
2288   *best_node_index = storage_->GetNodeIndex(cur_node);
2289   *key_offset = cur_key_idx;
2290 }
2291 
FindNewBranchingPrefixLength(std::string_view key,bool utf8) const2292 int IcingDynamicTrie::FindNewBranchingPrefixLength(std::string_view key,
2293                                                    bool utf8) const {
2294   if (!IsKeyValid(key)) {
2295     return kNoBranchFound;
2296   }
2297 
2298   if (storage_->empty()) {
2299     return kNoBranchFound;
2300   }
2301 
2302   uint32_t best_node_index;
2303   int key_offset;
2304   FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true, utf8);
2305   if (key_offset < 0) {
2306     return kNoBranchFound;
2307   }
2308 
2309   const Node *cur_node = storage_->GetNode(best_node_index);
2310   if (cur_node->is_leaf()) {
2311     // Prefix in the trie. Split at leaf.
2312     const char *prev_suffix = storage_->GetSuffix(cur_node->next_index());
2313     int additional_branch_prefix_len = 0;
2314     // Find the additional prefix length starting from prev_suffix[0] and
2315     // key[key_offset].
2316     // - prev_suffix terminates with '\0'.
2317     // - key is a std::string_view object, so it may not be null-terminated.
2318     // - key doesn't contain '\0' because it's checked in IsKeyValid() above.
2319     while (key_offset + additional_branch_prefix_len < key.size() &&
2320            prev_suffix[additional_branch_prefix_len] ==
2321                key[key_offset + additional_branch_prefix_len]) {
2322       ++additional_branch_prefix_len;
2323     }
2324 
2325     // Equal strings. No branching.
2326     bool strings_equal =
2327         prev_suffix[additional_branch_prefix_len] == '\0' &&
2328         key_offset + additional_branch_prefix_len >= key.size();
2329     if (strings_equal) {
2330       return kNoBranchFound;
2331     }
2332 
2333     // The remaining key (after key_offset) is a prefix of the suffix, so the
2334     // branching prefix length is key length.
2335     if (key_offset + additional_branch_prefix_len >= key.size()) {
2336       return key.size();
2337     }
2338 
2339     if (utf8) {
2340       // Rewind to utf8 boundary.
2341       return i18n_utils::SafeTruncateUtf8Length(
2342           key.data(), key_offset + additional_branch_prefix_len);
2343     }
2344 
2345     return key_offset + additional_branch_prefix_len;
2346   } else if (cur_node->log2_num_children() == 0) {
2347     // Intermediate node going from no branching to branching.
2348     return key_offset;
2349   }
2350 
2351   // If we've reached this point, then we're already at a branch point. So there
2352   // is no *new* branch point.
2353   return kNoBranchFound;
2354 }
2355 
FindBranchingPrefixLengths(std::string_view key,bool utf8) const2356 std::vector<int> IcingDynamicTrie::FindBranchingPrefixLengths(
2357     std::string_view key, bool utf8) const {
2358   std::vector<int> prefix_lengths;
2359 
2360   if (!IsKeyValid(key)) {
2361     return prefix_lengths;
2362   }
2363 
2364   if (storage_->empty()) {
2365     return prefix_lengths;
2366   }
2367 
2368   const Node *cur_node = storage_->GetRootNode();
2369   int idx = 0;
2370   while (idx < key.size() && !cur_node->is_leaf()) {
2371     // Branching prefix?
2372     if (cur_node->log2_num_children() > 0) {
2373       int len = idx;
2374       if (utf8) {
2375         // Do not cut mid-utf8. Walk up to utf8 boundary.
2376         len = i18n_utils::SafeTruncateUtf8Length(key.data(), len);
2377         if (prefix_lengths.empty() || len != prefix_lengths.back()) {
2378           prefix_lengths.push_back(len);
2379         }
2380       } else {
2381         prefix_lengths.push_back(len);
2382       }
2383     }
2384 
2385     // Move to next.
2386     const Next *found = GetNextByChar(cur_node, key[idx]);
2387     if (found == nullptr) {
2388       break;
2389     }
2390     cur_node = storage_->GetNode(found->node_index());
2391 
2392     ++idx;
2393   }
2394   return prefix_lengths;
2395 }
2396 
IsBranchingTerm(std::string_view key) const2397 bool IcingDynamicTrie::IsBranchingTerm(std::string_view key) const {
2398   if (!is_initialized()) {
2399     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2400   }
2401 
2402   if (!IsKeyValid(key)) {
2403     return false;
2404   }
2405 
2406   if (storage_->empty()) {
2407     return false;
2408   }
2409 
2410   uint32_t best_node_index;
2411   int key_offset;
2412   FindBestNode(key, &best_node_index, &key_offset, /*prefix=*/true);
2413   if (key_offset < 0) {
2414     return false;
2415   }
2416 
2417   const Node *cur_node = storage_->GetNode(best_node_index);
2418 
2419   if (cur_node->is_leaf()) {
2420     return false;
2421   }
2422 
2423   // There is no intermediate node for key in the trie.
2424   if (key_offset < key.size()) {
2425     return false;
2426   }
2427 
2428   // Found key as an intermediate node, but key is not a valid term stored in
2429   // the trie. In this case, we need at least two children for key to be a
2430   // branching term.
2431   if (GetNextByChar(cur_node, '\0') == nullptr) {
2432     return cur_node->log2_num_children() >= 1;
2433   }
2434 
2435   // The intermediate node for key must have more than two children for key to
2436   // be a branching term, one of which represents the leaf node for key itself.
2437   return cur_node->log2_num_children() > 1;
2438 }
2439 
GetDebugInfo(int verbosity,std::string * out) const2440 void IcingDynamicTrie::GetDebugInfo(int verbosity, std::string *out) const {
2441   Stats stats;
2442   CollectStats(&stats);
2443   out->append(stats.DumpStats(verbosity));
2444 
2445   // Property files.
2446   std::vector<std::string> files;
2447   if (!filesystem_->GetMatchingFiles((property_bitmaps_prefix_ + "*").c_str(),
2448                                      &files)) {
2449     ICING_LOG(ERROR) << "Could not get files at prefix "
2450                      << property_bitmaps_prefix_;
2451     return;
2452   }
2453   for (size_t i = 0; i < files.size(); i++) {
2454     IcingStringUtil::SStringAppendF(
2455         out, 1000, "Prop file %s size %" PRIu64 "\n",
2456         filesystem_->GetBasename(files[i].c_str()).c_str(),
2457         filesystem_->GetFileSize(files[i].c_str()));
2458   }
2459   IcingStringUtil::SStringAppendF(
2460       out, 1000, "Deleted file %s size %" PRIu64 "\n",
2461       filesystem_->GetBasename(deleted_bitmap_filename_.c_str()).c_str(),
2462       filesystem_->GetFileSize(deleted_bitmap_filename_.c_str()));
2463 }
2464 
min_free_fraction() const2465 double IcingDynamicTrie::min_free_fraction() const {
2466   if (!is_initialized()) {
2467     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2468   }
2469 
2470   return 1.0 -
2471          std::max(std::max(static_cast<double>(storage_->hdr().num_nodes()) /
2472                                storage_->hdr().max_nodes(),
2473                            static_cast<double>(storage_->hdr().num_nexts()) /
2474                                storage_->hdr().max_nexts()),
2475                   static_cast<double>(storage_->hdr().suffixes_size()) /
2476                       storage_->hdr().max_suffixes_size());
2477 }
2478 
value_size() const2479 uint32_t IcingDynamicTrie::value_size() const {
2480   return storage_->hdr().value_size();
2481 }
2482 
max_value_index() const2483 uint32_t IcingDynamicTrie::max_value_index() const {
2484   return storage_->hdr().max_suffixes_size();
2485 }
2486 
UpdateCrc()2487 Crc32 IcingDynamicTrie::UpdateCrc() {
2488   if (!is_initialized()) {
2489     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2490   }
2491 
2492   if (runtime_options_.storage_policy != RuntimeOptions::kMapSharedWithCrc) {
2493     return Crc32();
2494   }
2495 
2496   // Combine storage crc with property bitmap crcs.
2497   Crc32 crc = storage_->UpdateCrc();
2498 
2499   // Update crcs on bitmaps.
2500   for (size_t i = 0; i < property_bitmaps_.size(); ++i) {
2501     if (property_bitmaps_[i]) {
2502       // Combine property id with the bitmap crc.
2503       uint64_t property_crc = property_bitmaps_[i]->UpdateCrc().Get();
2504       property_crc = (property_crc << 32) | i;
2505       std::string_view property_crc_str(
2506           reinterpret_cast<const char *>(&property_crc), sizeof(property_crc));
2507       crc.Append(property_crc_str);
2508     }
2509   }
2510   uint32_t deleted_crc = deleted_bitmap_->UpdateCrc().Get();
2511   std::string_view deleted_crc_str(reinterpret_cast<const char *>(&deleted_crc),
2512                                    sizeof(deleted_crc));
2513   crc.Append(deleted_crc_str);
2514   return crc;
2515 }
2516 
GetCrc() const2517 Crc32 IcingDynamicTrie::GetCrc() const {
2518   if (!is_initialized()) {
2519     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2520   }
2521 
2522   if (runtime_options_.storage_policy != RuntimeOptions::kMapSharedWithCrc) {
2523     return Crc32();
2524   }
2525 
2526   // Combine storage crc with property bitmap crcs.
2527   Crc32 crc = storage_->GetCrc();
2528 
2529   // Update crcs on bitmaps.
2530   for (size_t i = 0; i < property_bitmaps_.size(); ++i) {
2531     if (property_bitmaps_[i]) {
2532       // Combine property id with the bitmap crc.
2533       uint64_t property_crc = property_bitmaps_[i]->GetCrc().Get();
2534       property_crc = (property_crc << 32) | i;
2535       std::string_view property_crc_str(
2536           reinterpret_cast<const char *>(&property_crc), sizeof(property_crc));
2537       crc.Append(property_crc_str);
2538     }
2539   }
2540   uint32_t deleted_crc = deleted_bitmap_->UpdateCrc().Get();
2541   std::string_view deleted_crc_str(reinterpret_cast<const char *>(&deleted_crc),
2542                                    sizeof(deleted_crc));
2543   crc.Append(deleted_crc_str);
2544   return crc;
2545 }
2546 
OpenOrCreatePropertyBitmap(uint32_t property_id)2547 IcingFlashBitmap *IcingDynamicTrie::OpenOrCreatePropertyBitmap(
2548     uint32_t property_id) {
2549   if (!is_initialized()) {
2550     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2551   }
2552 
2553   if (property_id > kMaxPropertyId) {
2554     ICING_LOG(ERROR) << "Property id " << property_id << " out of range";
2555     return nullptr;
2556   }
2557 
2558   if (property_id >= property_bitmaps_.size()) {
2559     property_bitmaps_.resize(property_id + 1);
2560   }
2561   if (!property_bitmaps_[property_id]) {
2562     std::string filename;
2563     IcingStringUtil::SStringAppendF(
2564         &filename, property_bitmaps_prefix_.size() + 10, "%s%u",
2565         property_bitmaps_prefix_.c_str(), property_id);
2566     property_bitmaps_[property_id] =
2567         OpenAndInitBitmap(filename, false, filesystem_);
2568   }
2569   return property_bitmaps_[property_id].get();
2570 }
2571 
SetProperty(uint32_t value_index,uint32_t property_id)2572 bool IcingDynamicTrie::SetProperty(uint32_t value_index, uint32_t property_id) {
2573   IcingFlashBitmap *bitmap = OpenOrCreatePropertyBitmap(property_id);
2574   if (!bitmap) {
2575     return false;
2576   }
2577   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2578 
2579   // Also clear deleted bit.
2580   return bitmap->SetBit(idx, true) && deleted_bitmap_->SetBit(idx, false);
2581 }
2582 
ClearProperty(uint32_t value_index,uint32_t property_id)2583 bool IcingDynamicTrie::ClearProperty(uint32_t value_index,
2584                                      uint32_t property_id) {
2585   if (property_id >= property_bitmaps_.size() ||
2586       !property_bitmaps_[property_id]) {
2587     // No bitmap is ok for clearing.
2588     return true;
2589   }
2590 
2591   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2592   return property_bitmaps_[property_id]->SetBit(idx, false);
2593 }
2594 
SetDeleted(uint32_t value_index)2595 bool IcingDynamicTrie::SetDeleted(uint32_t value_index) {
2596   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2597   return deleted_bitmap_->SetBit(idx, true);
2598 }
2599 
ClearDeleted(uint32_t value_index)2600 bool IcingDynamicTrie::ClearDeleted(uint32_t value_index) {
2601   uint64_t idx = ValueIndexToPropertyBitmapIndex(value_index);
2602   return deleted_bitmap_->SetBit(idx, false);
2603 }
2604 
2605 // Steps:
2606 // 1. Find the key in the trie.
2607 // 2. Remove the suffix and the value.
2608 // 3. Reset the nexts that point to the nodes to be removed.
2609 // 4. Sort any next array if needed.
2610 // 5. Reset the trie state if the trie is empty after deletion.
2611 //    - This is essential for storage_->empty(), which is a critical check for
2612 //      all trie APIs before accessing the root node via
2613 //      storage_->GetRootNode().
2614 //    - When the trie is empty, it is possible that the root node (i.e.
2615 //      Node(0)):
2616 //      - Contains an invalid next_index(), and accessing it will cause a crash
2617 //        or fetch incorrect data.
2618 //      - Points to a valid next array but the next elements in the array
2619 //        contain kInvalidNodeIndex. Accessing the next node via the next
2620 //        element will cause a crash or fetch incorrect data.
2621 //    - So we must reset the trie state to make sure storage_->empty() works
2622 //      correctly and prevents trie APIs from accessing the root node.
Delete(std::string_view key)2623 bool IcingDynamicTrie::Delete(std::string_view key) {
2624   if (!is_initialized()) {
2625     ICING_LOG(ERROR) << "DynamicTrie not initialized";
2626     return false;
2627   }
2628 
2629   if (!IsKeyValid(key)) {
2630     return false;
2631   }
2632 
2633   if (storage_->empty()) {
2634     // Nothing to delete.
2635     return true;
2636   }
2637 
2638   // Tries to find the key in the trie, starting from the root.
2639   const Node *current_node = storage_->GetRootNode();
2640 
2641   // The node after which we start to remove data.
2642   const Node *last_multichild_node = nullptr;
2643 
2644   // While visiting the trie nodes, we store the indices of Nexts that point
2645   // to all the nodes after last_multichild_node. Those nodes must be
2646   // consecutive and all have only one child. Resetting those Nexts means that
2647   // we remove the data of the key.
2648   std::vector<uint32_t> nexts_to_reset;
2649   nexts_to_reset.reserve(key.length());
2650 
2651   // Iterates through chars in the key, finds nodes in the trie until a leaf
2652   // node is reached. The max number of loops is key.length() + 1 because we
2653   // start from the root.
2654   for (size_t i = 0; i <= key.length(); ++i) {
2655     if (current_node->is_leaf()) {
2656       // Leaf node, now check the suffix.
2657       if (key.substr(i) != storage_->GetSuffix(current_node->next_index())) {
2658         // Key does not exist in the trie, nothing to delete.
2659         return true;
2660       }
2661       // Otherwise, key is found.
2662       break;
2663     }
2664 
2665     // Finds the next char.
2666     const Next *next;
2667     if (i == key.length()) {
2668       // When we're at the end of the key, the next char is the termination char
2669       // '\0'.
2670       next = GetNextByChar(current_node, '\0');
2671     } else {
2672       next = GetNextByChar(current_node, key[i]);
2673     }
2674 
2675     if (next == nullptr) {
2676       // Key does not exist in the trie, nothing to delete.
2677       return true;
2678     }
2679 
2680     // Checks the real size of next array.
2681     uint32_t next_array_buffer_size = 1u << current_node->log2_num_children();
2682     Next *next_array_start = storage_->GetMutableNextArray(
2683         current_node->next_index(), next_array_buffer_size);
2684     int valid_next_array_size =
2685         GetValidNextsSize(next_array_start, next_array_buffer_size);
2686     if (valid_next_array_size == 0) {
2687       // Key does not exist in the trie, nothing to delete.
2688       // This shouldn't happen, but we put a sanity check here in case something
2689       // is wrong.
2690       return true;
2691     } else if (valid_next_array_size == 1) {
2692       // Single-child branch will be deleted.
2693       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2694     } else {
2695       // We see a new node with multiple children, all the previously seen nodes
2696       // shouldn't be removed.
2697       last_multichild_node = current_node;
2698       nexts_to_reset.clear();
2699       nexts_to_reset.push_back(storage_->GetNextArrayIndex(next));
2700     }
2701 
2702     // Updates current_node.
2703     current_node = storage_->GetNode(next->node_index());
2704   }
2705   // Now we've found the key in the trie.
2706 
2707   ClearSuffixAndValue(current_node->next_index());
2708 
2709   // Resets nexts to remove key information.
2710   for (uint32_t next_index : nexts_to_reset) {
2711     ResetNext(next_index);
2712   }
2713 
2714   if (last_multichild_node != nullptr) {
2715     SortNextArray(last_multichild_node);
2716     uint32_t next_array_buffer_size =
2717         1u << last_multichild_node->log2_num_children();
2718     Next *next_array_start = this->storage_->GetMutableNextArray(
2719         last_multichild_node->next_index(), next_array_buffer_size);
2720     uint32_t num_children =
2721         GetValidNextsSize(next_array_start, next_array_buffer_size);
2722     // Shrink the next array if we can.
2723     if (num_children == next_array_buffer_size / 2) {
2724       Node *mutable_node = storage_->GetMutableNode(
2725           storage_->GetNodeIndex(last_multichild_node));
2726       mutable_node->set_log2_num_children(mutable_node->log2_num_children() -
2727                                           1);
2728       // Add the unused second half of the next array to the free list.
2729       storage_->FreeNextArray(next_array_start + next_array_buffer_size / 2,
2730                               mutable_node->log2_num_children());
2731     }
2732   }
2733 
2734   storage_->dec_num_keys();
2735   if (storage_->hdr().num_keys() == 0) {
2736     // Reset the trie state to empty by calling Clear() directly.
2737     //
2738     // Note: in this case, last_multichild_node will be nullptr as well.
2739     // - If we never saw a node with multiple children before deletion, then all
2740     //   the traversed nodes, including the root node, are single-child nodes
2741     //   before deletion.
2742     // - Therefore, after deletion, there should be no valid nodes or nexts in
2743     //   the trie.
2744     Clear();
2745   }
2746 
2747   return true;
2748 }
2749 
ClearPropertyForAllValues(uint32_t property_id)2750 bool IcingDynamicTrie::ClearPropertyForAllValues(uint32_t property_id) {
2751   if (!is_initialized()) {
2752     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2753   }
2754 
2755   PropertyReadersAll readers(*this);
2756   if (!readers.Exists(property_id)) {
2757     ICING_VLOG(1) << "Properties for id " << property_id << " don't exist";
2758     return true;
2759   }
2760 
2761   // Mark values that have no other properties set as as deleted.
2762   uint64_t max_idx =
2763       ValueIndexToPropertyBitmapIndex(storage_->hdr().suffixes_size());
2764   // TODO(vishwajith) Inefficient to do this bit by bit, should be word by
2765   // word. Removing a corpus is likely rare enough that this is low priority.
2766   for (uint64_t i = 0; i < max_idx; ++i) {
2767     // See if the bit is set in our property map.
2768     if (readers.IsPropertyUnique(property_id, i)) {
2769       deleted_bitmap_->SetBit(i, true);
2770     }
2771   }
2772 
2773   // Now delete the bitmap file for this property.
2774   std::unique_ptr<IcingFlashBitmap> bitmap(
2775       std::move(property_bitmaps_[property_id]));
2776   // bitmap cannot be null here, because then readers.Exists(property_id) would
2777   // have returned false earlier, and we wouldn't get here.
2778   if (bitmap == nullptr) {
2779     ICING_LOG(ERROR) << "Property bitmap is null";
2780     return false;
2781   }
2782 
2783   return bitmap->Delete();
2784 }
2785 
Exists() const2786 bool IcingDynamicTrie::PropertyReaderBase::Exists() const {
2787   return bitmap_ != nullptr;
2788 }
2789 
HasProperty(uint32_t value_index) const2790 bool IcingDynamicTrie::PropertyReaderBase::HasProperty(
2791     uint32_t value_index) const {
2792   return bitmap_ &&
2793          bitmap_->GetBit(trie_.ValueIndexToPropertyBitmapIndex(value_index));
2794 }
2795 
PropertyReaderBase(const IcingDynamicTrie & trie,bool deleted,uint32_t property_id)2796 IcingDynamicTrie::PropertyReaderBase::PropertyReaderBase(
2797     const IcingDynamicTrie &trie, bool deleted, uint32_t property_id)
2798     : trie_(trie) {
2799   if (!trie.is_initialized()) {
2800     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2801   }
2802 
2803   if (deleted) {
2804     bitmap_ = trie.deleted_bitmap_.get();
2805   } else if (property_id < trie.property_bitmaps_.size()) {
2806     bitmap_ = trie.property_bitmaps_[property_id].get();
2807   } else {
2808     bitmap_ = nullptr;
2809   }
2810 }
2811 
PropertyReadersAll(const IcingDynamicTrie & trie)2812 IcingDynamicTrie::PropertyReadersAll::PropertyReadersAll(
2813     const IcingDynamicTrie &trie)
2814     : trie_(trie) {
2815   if (!trie.is_initialized()) {
2816     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2817   }
2818 }
2819 
Exists(uint32_t property_id) const2820 bool IcingDynamicTrie::PropertyReadersAll::Exists(uint32_t property_id) const {
2821   return property_id < trie_.property_bitmaps_.size() &&
2822          trie_.property_bitmaps_[property_id];
2823 }
2824 
HasProperty(uint32_t property_id,uint32_t value_index) const2825 bool IcingDynamicTrie::PropertyReadersAll::HasProperty(
2826     uint32_t property_id, uint32_t value_index) const {
2827   return property_id < trie_.property_bitmaps_.size() &&
2828          trie_.property_bitmaps_[property_id] &&
2829          trie_.property_bitmaps_[property_id]->GetBit(
2830              trie_.ValueIndexToPropertyBitmapIndex(value_index));
2831 }
2832 
IsPropertyUnique(uint32_t property_id,uint32_t value_index) const2833 bool IcingDynamicTrie::PropertyReadersAll::IsPropertyUnique(
2834     uint32_t property_id, uint32_t value_index) const {
2835   uint32_t idx = trie_.ValueIndexToPropertyBitmapIndex(value_index);
2836 
2837   // First check that value is set for the requested id.
2838   if (property_id >= trie_.property_bitmaps_.size() ||
2839       !trie_.property_bitmaps_[property_id] ||
2840       !trie_.property_bitmaps_[property_id]->GetBit(idx)) {
2841     return false;
2842   }
2843 
2844   // Now check that the value is not set for the rest.
2845   for (size_t i = 0; i < trie_.property_bitmaps_.size(); ++i) {
2846     if (i == property_id) {
2847       continue;
2848     }
2849     if (trie_.property_bitmaps_[i] && trie_.property_bitmaps_[i]->GetBit(idx)) {
2850       return false;
2851     }
2852   }
2853   return true;
2854 }
2855 
size() const2856 size_t IcingDynamicTrie::PropertyReadersAll::size() const {
2857   return trie_.property_bitmaps_.size();
2858 }
2859 
ValueIndexToPropertyBitmapIndex(uint32_t value_index) const2860 uint64_t IcingDynamicTrie::ValueIndexToPropertyBitmapIndex(
2861     uint32_t value_index) const {
2862   // We know that value indices are separated by at least 1 +
2863   // value_size() bytes (for the null terminator and the value).
2864   return value_index / (value_size() + 1);
2865 }
2866 
2867 // Testing hooks.
GetHeader(IcingDynamicTrieHeader * hdr) const2868 void IcingDynamicTrie::GetHeader(IcingDynamicTrieHeader *hdr) const {
2869   if (!is_initialized()) {
2870     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2871   }
2872 
2873   *hdr = storage_->hdr();
2874 }
2875 
SetHeader(const IcingDynamicTrieHeader & new_hdr)2876 void IcingDynamicTrie::SetHeader(const IcingDynamicTrieHeader &new_hdr) {
2877   if (!is_initialized()) {
2878     ICING_LOG(FATAL) << "DynamicTrie not initialized";
2879   }
2880 
2881   storage_->hdr_.hdr = new_hdr;
2882   storage_->WriteHeader();
2883 }
2884 
2885 }  // namespace lib
2886 }  // namespace icing
2887