xref: /aosp_15_r20/external/icing/icing/file/posting_list/flash-index-storage.cc (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "icing/file/posting_list/flash-index-storage.h"
16 
17 #include <sys/types.h>
18 #include <unistd.h>
19 
20 #include <algorithm>
21 #include <cerrno>
22 #include <cinttypes>
23 #include <cstdint>
24 #include <memory>
25 
26 #include "icing/text_classifier/lib3/utils/base/status.h"
27 #include "icing/text_classifier/lib3/utils/base/statusor.h"
28 #include "icing/absl_ports/canonical_errors.h"
29 #include "icing/absl_ports/str_cat.h"
30 #include "icing/file/posting_list/index-block.h"
31 #include "icing/file/posting_list/posting-list-common.h"
32 #include "icing/legacy/core/icing-string-util.h"
33 #include "icing/util/logging.h"
34 #include "icing/util/math-util.h"
35 #include "icing/util/status-macros.h"
36 
37 namespace icing {
38 namespace lib {
39 
Create(std::string index_filename,const Filesystem * filesystem,PostingListSerializer * serializer,bool in_memory)40 libtextclassifier3::StatusOr<FlashIndexStorage> FlashIndexStorage::Create(
41     std::string index_filename, const Filesystem* filesystem,
42     PostingListSerializer* serializer, bool in_memory) {
43   ICING_RETURN_ERROR_IF_NULL(filesystem);
44   ICING_RETURN_ERROR_IF_NULL(serializer);
45 
46   FlashIndexStorage storage(filesystem, std::move(index_filename), serializer,
47                             in_memory);
48   if (!storage.Init()) {
49     return absl_ports::InternalError(
50         "Unable to successfully read header block!");
51   }
52   return storage;
53 }
54 
55 /* static */ libtextclassifier3::StatusOr<int>
ReadHeaderMagic(const Filesystem * filesystem,const std::string & index_filename)56 FlashIndexStorage::ReadHeaderMagic(const Filesystem* filesystem,
57                                    const std::string& index_filename) {
58   ICING_RETURN_ERROR_IF_NULL(filesystem);
59 
60   if (!filesystem->FileExists(index_filename.c_str())) {
61     return absl_ports::NotFoundError("Flash index file doesn't exist");
62   }
63 
64   ScopedFd sfd(filesystem->OpenForRead(index_filename.c_str()));
65   if (!sfd.is_valid()) {
66     return absl_ports::InternalError("Fail to open flash index file");
67   }
68 
69   uint32_t block_size = SelectBlockSize();
70   // Read and validate header.
71   ICING_ASSIGN_OR_RETURN(HeaderBlock header_block,
72                          HeaderBlock::Read(filesystem, sfd.get(), block_size));
73   return header_block.header()->magic;
74 }
75 
~FlashIndexStorage()76 FlashIndexStorage::~FlashIndexStorage() {
77   if (header_block_ != nullptr) {
78     libtextclassifier3::Status status = FlushInMemoryFreeList();
79     if (!status.ok()) {
80       ICING_LOG(ERROR) << "Cannot flush in memory free list: "
81                        << status.error_message();
82     }
83     PersistToDisk();
84   }
85 }
86 
SelectBlockSize()87 /* static */ uint32_t FlashIndexStorage::SelectBlockSize() {
88   // This should be close to the flash page size.
89   static constexpr uint32_t kMinBlockSize = 4096;
90 
91   // Determine a good block size.
92   uint32_t page_size = getpagesize();
93   uint32_t block_size = std::max(kMinBlockSize, page_size);
94 
95   // Align up to the nearest page size.
96   return math_util::RoundUpTo(block_size, page_size);
97 }
98 
Init()99 bool FlashIndexStorage::Init() {
100   storage_sfd_ = ScopedFd(filesystem_->OpenForWrite(index_filename_.c_str()));
101   if (!storage_sfd_.is_valid()) {
102     return false;
103   }
104 
105   // Read in or create the header.
106   return InitHeader();
107 }
108 
InitHeader()109 bool FlashIndexStorage::InitHeader() {
110   // Look for an existing file size.
111   int64_t file_size = filesystem_->GetFileSize(storage_sfd_.get());
112   if (file_size == Filesystem::kBadFileSize) {
113     ICING_LOG(ERROR) << "Could not initialize main index. Bad file size.";
114     return false;
115   }
116 
117   if (file_size == 0) {
118     if (!CreateHeader()) {
119       ICING_LOG(ERROR)
120           << "Could not initialize main index. Unable to create header.";
121       return false;
122     }
123   } else {
124     if (!OpenHeader(file_size)) {
125       ICING_LOG(ERROR)
126           << "Could not initialize main index. Unable to open header.";
127       return false;
128     }
129   }
130   in_memory_freelists_.resize(header_block_->header()->num_index_block_infos);
131 
132   return true;
133 }
134 
CreateHeader()135 bool FlashIndexStorage::CreateHeader() {
136   uint32_t block_size = SelectBlockSize();
137   header_block_ = std::make_unique<HeaderBlock>(filesystem_, block_size);
138   // Initialize.
139   header_block_->header()->magic = HeaderBlock::Header::kMagic;
140   header_block_->header()->block_size = block_size;
141   header_block_->header()->last_indexed_docid = kInvalidDocumentId;
142 
143   // Work down from the largest posting list that fits in
144   // block_size. We don't care about locality of blocks because this
145   // is a flash index.
146   for (uint32_t posting_list_bytes = max_posting_list_bytes();
147        posting_list_bytes >= serializer_->GetMinPostingListSize();
148        posting_list_bytes /= 2) {
149     uint32_t aligned_posting_list_bytes =
150         (posting_list_bytes / serializer_->GetDataTypeBytes()) *
151         serializer_->GetDataTypeBytes();
152     ICING_VLOG(1) << "Block size "
153                   << header_block_->header()->num_index_block_infos << ": "
154                   << aligned_posting_list_bytes;
155 
156     // Initialize free list to empty.
157     HeaderBlock::Header::IndexBlockInfo* block_info =
158         header_block_->AddIndexBlockInfo();
159     if (block_info == nullptr) {
160       // This should never happen anyways. Min block size is 4k, so adding these
161       // IndexBlockInfos should never exceed the block size.
162       return false;
163     }
164     block_info->posting_list_bytes = aligned_posting_list_bytes;
165     block_info->free_list_block_index = kInvalidBlockIndex;
166   }
167 
168   // Write the header.
169   if (!header_block_->Write(storage_sfd_.get())) {
170     filesystem_->Truncate(storage_sfd_.get(), 0);
171     return false;
172   }
173   num_blocks_ = 1;
174   return true;
175 }
176 
OpenHeader(int64_t file_size)177 bool FlashIndexStorage::OpenHeader(int64_t file_size) {
178   uint32_t block_size = SelectBlockSize();
179   // Read and validate header.
180   ICING_ASSIGN_OR_RETURN(
181       HeaderBlock read_header,
182       HeaderBlock::Read(filesystem_, storage_sfd_.get(), block_size), false);
183   if (read_header.header()->magic != HeaderBlock::Header::kMagic) {
184     ICING_LOG(ERROR) << "Index header block wrong magic";
185     return false;
186   }
187   if (file_size % read_header.header()->block_size != 0) {
188     ICING_LOG(ERROR) << "Index size " << file_size
189                      << " not a multiple of block size "
190                      << read_header.header()->block_size;
191     return false;
192   }
193 
194   if (file_size < static_cast<int64_t>(read_header.header()->block_size)) {
195     ICING_LOG(ERROR) << "Index size " << file_size
196                      << " shorter than block size "
197                      << read_header.header()->block_size;
198     return false;
199   }
200 
201   if (read_header.header()->block_size % getpagesize() != 0) {
202     ICING_LOG(ERROR) << "Block size " << read_header.header()->block_size
203                      << " is not a multiple of page size " << getpagesize();
204     return false;
205   }
206   num_blocks_ = file_size / read_header.header()->block_size;
207   if (block_size != read_header.header()->block_size) {
208     // The block_size changed? That's weird. But the old block_size is still
209     // valid (it must be some multiple of the new block_size). So reinitialize
210     // with that old block size. Using the old block size means that we can
211     // still use the main index, but reads/writes won't be as efficient in terms
212     // of flash IO because the 'blocks' that we're reading are actually multiple
213     // pages long.
214     ICING_LOG(ERROR) << "Block size of existing header ("
215                      << read_header.header()->block_size
216                      << ") does not match the requested block size ("
217                      << block_size << "). Defaulting to existing block size "
218                      << read_header.header()->block_size;
219     ICING_ASSIGN_OR_RETURN(HeaderBlock read_header,
220                            HeaderBlock::Read(filesystem_, storage_sfd_.get(),
221                                              read_header.header()->block_size),
222                            false);
223   }
224   header_block_ = std::make_unique<HeaderBlock>(std::move(read_header));
225 
226   // Check for memory alignment on posting_list_bytes. See b/29983315.
227   // The issue of potential corruption to the header could also be handled by
228   // checksumming the header block.
229   for (int i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
230     int posting_list_bytes =
231         header_block_->header()->index_block_infos[i].posting_list_bytes;
232     if (posting_list_bytes % serializer_->GetDataTypeBytes() != 0) {
233       ICING_LOG(ERROR)
234           << "Posting list size misaligned, index " << i << ", size "
235           << header_block_->header()->index_block_infos[i].posting_list_bytes
236           << ", data_type_bytes " << serializer_->GetDataTypeBytes()
237           << ", file_size " << file_size;
238       return false;
239     }
240   }
241   return true;
242 }
243 
PersistToDisk()244 bool FlashIndexStorage::PersistToDisk() {
245   // First, write header.
246   if (!header_block_->Write(storage_sfd_.get())) {
247     ICING_LOG(ERROR) << "Write index header failed: " << strerror(errno);
248     return false;
249   }
250 
251   // Then sync.
252   return filesystem_->DataSync(storage_sfd_.get());
253 }
254 
Reset()255 libtextclassifier3::Status FlashIndexStorage::Reset() {
256   // Reset in-memory members to default values.
257   num_blocks_ = 0;
258   header_block_.reset();
259   storage_sfd_.reset();
260   in_memory_freelists_.clear();
261 
262   // Delete the underlying file.
263   if (!filesystem_->DeleteFile(index_filename_.c_str())) {
264     return absl_ports::InternalError(
265         absl_ports::StrCat("Unable to delete file: ", index_filename_));
266   }
267 
268   // Re-initialize.
269   if (!Init()) {
270     return absl_ports::InternalError(
271         "Unable to successfully read header block!");
272   }
273   return libtextclassifier3::Status::OK;
274 }
275 
276 libtextclassifier3::StatusOr<PostingListHolder>
GetPostingList(PostingListIdentifier id) const277 FlashIndexStorage::GetPostingList(PostingListIdentifier id) const {
278   ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(id.block_index()));
279   ICING_ASSIGN_OR_RETURN(
280       IndexBlock::PostingListAndBlockInfo pl_block_info,
281       block.GetAllocatedPostingList(id.posting_list_index()));
282   return PostingListHolder(std::move(pl_block_info.posting_list_used), id,
283                            pl_block_info.next_block_index);
284 }
285 
GetIndexBlock(uint32_t block_index) const286 libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::GetIndexBlock(
287     uint32_t block_index) const {
288   if (block_index >= num_blocks_) {
289     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
290         "Unable to create an index block at index %" PRIu32
291         " when only %d blocks have been allocated.",
292         block_index, num_blocks_));
293   }
294   off_t offset = static_cast<off_t>(block_index) * block_size();
295   return IndexBlock::CreateFromPreexistingIndexBlockRegion(
296       filesystem_, serializer_, storage_sfd_.get(), offset, block_size());
297 }
298 
CreateIndexBlock(uint32_t block_index,uint32_t posting_list_size) const299 libtextclassifier3::StatusOr<IndexBlock> FlashIndexStorage::CreateIndexBlock(
300     uint32_t block_index, uint32_t posting_list_size) const {
301   if (block_index >= num_blocks_) {
302     return absl_ports::OutOfRangeError(IcingStringUtil::StringPrintf(
303         "Unable to create an index block at index %" PRIu32
304         " when only %d blocks have been allocated.",
305         block_index, num_blocks_));
306   }
307   off_t offset = static_cast<off_t>(block_index) * block_size();
308   return IndexBlock::CreateFromUninitializedRegion(
309       filesystem_, serializer_, storage_sfd_.get(), offset, block_size(),
310       posting_list_size);
311 }
312 
FindBestIndexBlockInfo(uint32_t posting_list_bytes) const313 int FlashIndexStorage::FindBestIndexBlockInfo(
314     uint32_t posting_list_bytes) const {
315   int i = header_block_->header()->num_index_block_infos - 1;
316   for (; i >= 0; i--) {
317     if (header_block_->header()->index_block_infos[i].posting_list_bytes >=
318         posting_list_bytes) {
319       return i;
320     }
321   }
322   return i;
323 }
324 
325 libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromInMemoryFreeList(int block_info_index)326 FlashIndexStorage::GetPostingListFromInMemoryFreeList(int block_info_index) {
327   // Get something from in memory free list.
328   ICING_ASSIGN_OR_RETURN(PostingListIdentifier posting_list_id,
329                          in_memory_freelists_[block_info_index].TryPop());
330   // Remember, posting lists stored on the in-memory free list were never
331   // actually freed. So it will still contain a valid PostingListUsed. First, we
332   // need to free this posting list.
333   ICING_ASSIGN_OR_RETURN(IndexBlock block,
334                          GetIndexBlock(posting_list_id.block_index()));
335   ICING_RETURN_IF_ERROR(
336       block.FreePostingList(posting_list_id.posting_list_index()));
337 
338   // Now, we can allocate a posting list from the same index block. It may not
339   // be the same posting list that was just freed, but that's okay.
340   ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
341                          block.AllocatePostingList());
342   posting_list_id = PostingListIdentifier(
343       posting_list_id.block_index(), pl_block_info.posting_list_index,
344       posting_list_id.posting_list_index_bits());
345 
346   return PostingListHolder(std::move(pl_block_info.posting_list_used),
347                            posting_list_id, pl_block_info.next_block_index);
348 }
349 
350 libtextclassifier3::StatusOr<PostingListHolder>
GetPostingListFromOnDiskFreeList(int block_info_index)351 FlashIndexStorage::GetPostingListFromOnDiskFreeList(int block_info_index) {
352   // Get something from the free list.
353   uint32_t block_index = header_block_->header()
354                              ->index_block_infos[block_info_index]
355                              .free_list_block_index;
356   if (block_index == kInvalidBlockIndex) {
357     return absl_ports::NotFoundError("No available entry in free list.");
358   }
359 
360   // Get the index block
361   ICING_ASSIGN_OR_RETURN(IndexBlock block, GetIndexBlock(block_index));
362   ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
363                          block.AllocatePostingList());
364   PostingListIdentifier posting_list_id =
365       PostingListIdentifier(block_index, pl_block_info.posting_list_index,
366                             block.posting_list_index_bits());
367   if (!pl_block_info.has_free_posting_lists) {
368     ICING_RETURN_IF_ERROR(
369         RemoveFromOnDiskFreeList(block_index, block_info_index, &block));
370   }
371 
372   return PostingListHolder(std::move(pl_block_info.posting_list_used),
373                            posting_list_id, pl_block_info.next_block_index);
374 }
375 
376 libtextclassifier3::StatusOr<PostingListHolder>
AllocateNewPostingList(int block_info_index)377 FlashIndexStorage::AllocateNewPostingList(int block_info_index) {
378   uint32_t block_index = GrowIndex();
379   if (block_index == kInvalidBlockIndex) {
380     return absl_ports::ResourceExhaustedError(
381         "Unable to grow the index further!");
382   }
383   ICING_ASSIGN_OR_RETURN(
384       IndexBlock block,
385       CreateIndexBlock(block_index, header_block_->header()
386                                         ->index_block_infos[block_info_index]
387                                         .posting_list_bytes));
388   ICING_ASSIGN_OR_RETURN(IndexBlock::PostingListAndBlockInfo pl_block_info,
389                          block.AllocatePostingList());
390   PostingListIdentifier posting_list_id =
391       PostingListIdentifier(block_index, pl_block_info.posting_list_index,
392                             block.posting_list_index_bits());
393   if (pl_block_info.has_free_posting_lists) {
394     AddToOnDiskFreeList(block_index, block_info_index, &block);
395   }
396 
397   return PostingListHolder(std::move(pl_block_info.posting_list_used),
398                            posting_list_id, pl_block_info.next_block_index);
399 }
400 
401 libtextclassifier3::StatusOr<PostingListHolder>
AllocatePostingList(uint32_t min_posting_list_bytes)402 FlashIndexStorage::AllocatePostingList(uint32_t min_posting_list_bytes) {
403   int max_pl_size = max_posting_list_bytes();
404   if (min_posting_list_bytes > max_pl_size) {
405     return absl_ports::InvalidArgumentError(IcingStringUtil::StringPrintf(
406         "Requested posting list size %d exceeds max posting list size %d",
407         min_posting_list_bytes, max_pl_size));
408   }
409   int best_block_info_index = FindBestIndexBlockInfo(min_posting_list_bytes);
410 
411   auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
412   if (holder_or.ok()) {
413     return std::move(holder_or).ValueOrDie();
414   }
415 
416   // Nothing in memory. Look for something in the block file.
417   holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
418   if (holder_or.ok()) {
419     return std::move(holder_or).ValueOrDie();
420   }
421 
422   return AllocateNewPostingList(best_block_info_index);
423 }
424 
425 libtextclassifier3::StatusOr<PostingListHolder>
AllocateAndChainMaxSizePostingList(uint32_t prev_block_index)426 FlashIndexStorage::AllocateAndChainMaxSizePostingList(
427     uint32_t prev_block_index) {
428   uint32_t max_pl_size = max_posting_list_bytes();
429   int best_block_info_index = FindBestIndexBlockInfo(max_pl_size);
430 
431   auto holder_or = GetPostingListFromInMemoryFreeList(best_block_info_index);
432   if (!holder_or.ok()) {
433     // Nothing in memory. Look for something in the block file.
434     holder_or = GetPostingListFromOnDiskFreeList(best_block_info_index);
435   }
436 
437   if (!holder_or.ok()) {
438     // Nothing in memory or block file. Allocate new block and posting list.
439     holder_or = AllocateNewPostingList(best_block_info_index);
440   }
441 
442   if (!holder_or.ok()) {
443     return holder_or;
444   }
445 
446   PostingListHolder holder = std::move(holder_or).ValueOrDie();
447   ICING_ASSIGN_OR_RETURN(IndexBlock block,
448                          GetIndexBlock(holder.id.block_index()));
449   ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(prev_block_index));
450   holder.next_block_index = prev_block_index;
451   return holder;
452 }
453 
AddToOnDiskFreeList(uint32_t block_index,int block_info_index,IndexBlock * index_block)454 void FlashIndexStorage::AddToOnDiskFreeList(uint32_t block_index,
455                                             int block_info_index,
456                                             IndexBlock* index_block) {
457   libtextclassifier3::Status status =
458       index_block->SetNextBlockIndex(header_block_->header()
459                                          ->index_block_infos[block_info_index]
460                                          .free_list_block_index);
461   if (!status.ok()) {
462     // If an error occurs, then simply skip this block. It just prevents us from
463     // allocating posting lists from this free block in the future and thus
464     // wastes at most one block, but the entire storage (including the
465     // FlashIndexStorage header) is still valid. Therefore, we can swallow
466     // errors here.
467     ICING_VLOG(1) << "Fail to set next block index to chain blocks with free "
468                      "lists on disk: "
469                   << status.error_message();
470     return;
471   }
472 
473   header_block_->header()
474       ->index_block_infos[block_info_index]
475       .free_list_block_index = block_index;
476 }
477 
RemoveFromOnDiskFreeList(uint32_t block_index,int block_info_index,IndexBlock * index_block)478 libtextclassifier3::Status FlashIndexStorage::RemoveFromOnDiskFreeList(
479     uint32_t block_index, int block_info_index, IndexBlock* index_block) {
480   // Cannot be used anymore. Move free ptr to the next block.
481   ICING_ASSIGN_OR_RETURN(uint32_t next_block_index,
482                          index_block->GetNextBlockIndex());
483   ICING_RETURN_IF_ERROR(index_block->SetNextBlockIndex(kInvalidBlockIndex));
484   header_block_->header()
485       ->index_block_infos[block_info_index]
486       .free_list_block_index = next_block_index;
487   return libtextclassifier3::Status::OK;
488 }
489 
FreePostingList(PostingListHolder && holder)490 libtextclassifier3::Status FlashIndexStorage::FreePostingList(
491     PostingListHolder&& holder) {
492   ICING_ASSIGN_OR_RETURN(IndexBlock block,
493                          GetIndexBlock(holder.id.block_index()));
494   if (block.posting_list_bytes() == max_posting_list_bytes()) {
495     ICING_RETURN_IF_ERROR(block.SetNextBlockIndex(kInvalidBlockIndex));
496   }
497 
498   uint32_t posting_list_bytes = block.posting_list_bytes();
499   int best_block_info_index = FindBestIndexBlockInfo(posting_list_bytes);
500 
501   // It *should* be guaranteed elsewhere that FindBestIndexBlockInfo will not
502   // return a value in >= in_memory_freelists_, but check regardless. If it
503   // doesn't fit for some reason, then put it in the Header free list instead.
504   if (has_in_memory_freelists_ &&
505       best_block_info_index < in_memory_freelists_.size()) {
506     in_memory_freelists_[best_block_info_index].Push(holder.id);
507   } else {
508     ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
509     ICING_RETURN_IF_ERROR(
510         block.FreePostingList(holder.id.posting_list_index()));
511     // If this block was not already full, then it is already in the free list.
512     if (!was_not_full) {
513       AddToOnDiskFreeList(holder.id.block_index(), best_block_info_index,
514                           &block);
515     }
516   }
517   return libtextclassifier3::Status::OK;
518 }
519 
WritePostingListToDisk(const PostingListHolder & holder)520 libtextclassifier3::Status FlashIndexStorage::WritePostingListToDisk(
521     const PostingListHolder& holder) {
522   ICING_ASSIGN_OR_RETURN(IndexBlock block,
523                          GetIndexBlock(holder.id.block_index()));
524   return block.WritePostingListToDisk(holder.posting_list,
525                                       holder.id.posting_list_index());
526 }
527 
GrowIndex()528 int FlashIndexStorage::GrowIndex() {
529   if (num_blocks_ >= kMaxBlockIndex) {
530     ICING_VLOG(1) << "Reached max block index " << kMaxBlockIndex;
531     return kInvalidBlockIndex;
532   }
533 
534   // Grow the index file.
535   if (!filesystem_->Grow(
536           storage_sfd_.get(),
537           static_cast<uint64_t>(num_blocks_ + 1) * block_size())) {
538     ICING_VLOG(1) << "Error growing index file: " << strerror(errno);
539     return kInvalidBlockIndex;
540   }
541 
542   return num_blocks_++;
543 }
544 
FlushInMemoryFreeList()545 libtextclassifier3::Status FlashIndexStorage::FlushInMemoryFreeList() {
546   for (int i = 0; i < in_memory_freelists_.size(); ++i) {
547     FreeList& freelist = in_memory_freelists_.at(i);
548     auto freelist_elt_or = freelist.TryPop();
549     while (freelist_elt_or.ok()) {
550       PostingListIdentifier freelist_elt = freelist_elt_or.ValueOrDie();
551       // Remember, posting lists stored on the in-memory free list were never
552       // actually freed. So it will still contain a valid PostingListUsed.
553       // First, we need to free this posting list.
554       auto block_or = GetIndexBlock(freelist_elt.block_index());
555       if (!block_or.ok()) {
556         // Can't read the block. Nothing to do here. This posting list will have
557         // to leak. Just proceed to the next freelist element.
558         freelist_elt_or = freelist.TryPop();
559         continue;
560       }
561       IndexBlock block = std::move(block_or).ValueOrDie();
562       ICING_ASSIGN_OR_RETURN(bool was_not_full, block.HasFreePostingLists());
563       ICING_RETURN_IF_ERROR(
564           block.FreePostingList(freelist_elt.posting_list_index()));
565       // If this block was not already full, then it is already in the free
566       // list.
567       if (!was_not_full) {
568         AddToOnDiskFreeList(freelist_elt.block_index(), /*block_info_index=*/i,
569                             &block);
570       }
571       freelist_elt_or = freelist.TryPop();
572     }
573   }
574   return libtextclassifier3::Status::OK;
575 }
576 
GetDebugInfo(DebugInfoVerbosity::Code verbosity,std::string * out) const577 void FlashIndexStorage::GetDebugInfo(DebugInfoVerbosity::Code verbosity,
578                                      std::string* out) const {
579   // Dump and check integrity of the index block free lists.
580   out->append("Free lists:\n");
581   for (size_t i = 0; i < header_block_->header()->num_index_block_infos; ++i) {
582     // TODO(tjbarron) Port over StringAppendFormat to migrate off of this legacy
583     // util.
584     IcingStringUtil::SStringAppendF(
585         out, 100, "Posting list bytes %u: ",
586         header_block_->header()->index_block_infos[i].posting_list_bytes);
587     uint32_t block_index =
588         header_block_->header()->index_block_infos[i].free_list_block_index;
589     int count = 0;
590     while (block_index != kInvalidBlockIndex) {
591       auto block_or = GetIndexBlock(block_index);
592       IcingStringUtil::SStringAppendF(out, 100, "%u ", block_index);
593       ++count;
594 
595       block_index = kInvalidBlockIndex;
596       if (block_or.ok()) {
597         auto block_index_or = block_or.ValueOrDie().GetNextBlockIndex();
598         if (block_index_or.ok()) {
599           block_index = block_index_or.ValueOrDie();
600         }
601       }
602     }
603     IcingStringUtil::SStringAppendF(out, 100, "(count=%d)\n", count);
604   }
605 
606   out->append("In memory free lists:\n");
607   if (in_memory_freelists_.size() ==
608       header_block_->header()->num_index_block_infos) {
609     for (size_t i = 0; i < in_memory_freelists_.size(); ++i) {
610       IcingStringUtil::SStringAppendF(
611           out, 100, "Posting list bytes %u %s\n",
612           header_block_->header()->index_block_infos[i].posting_list_bytes,
613           in_memory_freelists_.at(i).DebugString().c_str());
614     }
615   } else {
616     IcingStringUtil::SStringAppendF(
617         out, 100,
618         "In memory free list size %zu doesn't match index block infos size "
619         "%d\n",
620         in_memory_freelists_.size(),
621         header_block_->header()->num_index_block_infos);
622   }
623 }
624 
625 // FreeList.
Push(PostingListIdentifier id)626 void FlashIndexStorage::FreeList::Push(PostingListIdentifier id) {
627   if (free_list_.size() >= kMaxSize) {
628     ICING_LOG(WARNING)
629         << "Freelist for posting lists of size (block_size / "
630         << (1u << id.posting_list_index_bits())
631         << ") has reached max size. Dropping freed posting list [block_index:"
632         << id.block_index()
633         << ", posting_list_index:" << id.posting_list_index() << "]";
634     ++num_dropped_free_list_entries_;
635     return;
636   }
637 
638   free_list_.push_back(id);
639   free_list_size_high_watermark_ = std::max(
640       free_list_size_high_watermark_, static_cast<int>(free_list_.size()));
641 }
642 
643 libtextclassifier3::StatusOr<PostingListIdentifier>
TryPop()644 FlashIndexStorage::FreeList::TryPop() {
645   if (free_list_.empty()) {
646     return absl_ports::NotFoundError("No available entry in free list.");
647   }
648 
649   PostingListIdentifier id = free_list_.back();
650   free_list_.pop_back();
651   return id;
652 }
653 
DebugString() const654 std::string FlashIndexStorage::FreeList::DebugString() const {
655   return IcingStringUtil::StringPrintf(
656       "size %zu max %d dropped %d", free_list_.size(),
657       free_list_size_high_watermark_, num_dropped_free_list_entries_);
658 }
659 
660 }  // namespace lib
661 }  // namespace icing
662