1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Copyright 2012 Google Inc. All Rights Reserved. 16 // Author: [email protected] (Ulas Kirazci) 17 // 18 // A disk-backed array. 19 20 #ifndef ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_ 21 #define ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_ 22 23 #include <cstdint> 24 #include <string> 25 #include <vector> 26 27 #include "icing/legacy/index/icing-filesystem.h" 28 #include "icing/legacy/index/icing-mmapper.h" 29 #include "icing/util/crc32.h" 30 31 namespace icing { 32 namespace lib { 33 34 class IcingArrayStorage { 35 public: 36 explicit IcingArrayStorage(const IcingFilesystem &filesystem); 37 ~IcingArrayStorage(); 38 39 // Mmap a disk-backed array at fd_offset in fd. fd is owned by the 40 // caller and must be kept valid. 41 // 42 // If map_shared is true, changes to GetMutableMem immediately apply 43 // to the backing store. Otherwise changes are kept private until an 44 // explicit call to Flush. 45 // 46 // Each element in the array is elt_size bytes and the array is 47 // valid up to num_elts. max_num_elts is the max that the array is 48 // allowed to grow to. 49 // 50 // If crc_ptr is not NULL, explicit calls to UpdateCrc keep the crc 51 // of the array in *crc_ptr. 52 // 53 // If init_crc is true, the crc of the array is recomputed and 54 // written into crc_ptr. Else, the crc of the array is checked 55 // against the current value in crc_ptr and Init fails if the crc 56 // does not match. 57 // 58 // REQUIRES: !is_initialized() 59 bool Init(int fd, size_t fd_offset, bool map_shared, uint32_t elt_size, 60 uint32_t num_elts, uint32_t max_num_elts, uint32_t *crc_ptr, 61 bool init_crc); 62 63 // Undo Init. Make is_initialized() == false. 64 void Reset(); 65 is_initialized()66 bool is_initialized() const { return mmapper_ != nullptr; } 67 68 // Attempt to swap into RAM. 69 void Warm() const; 70 71 // Make array empty again. 72 void Clear(); 73 74 // Intent to write memory at (elt_idx, elt_idx + elt_len). Returns 75 // NULL if file cannot be grown to accommodate that offset. 76 template <class T> GetMutableMem(uint32_t elt_idx,uint32_t elt_len)77 T *GetMutableMem(uint32_t elt_idx, uint32_t elt_len) { 78 return static_cast<T *>(GetMutableMemInternal(elt_idx, elt_len)); 79 } 80 81 // Resizes to first elt_len elements. 82 // REQUIRES: elt_len <= num_elts() 83 void Truncate(uint32_t len); 84 85 // Push changes to crc into crc_ptr. No effect if crc_ptr is NULL. 86 Crc32 UpdateCrc(); 87 88 // Returns the crc of the current content or 0 if crc_ptr is NULL. Does not 89 // modify crc_ptr. 90 Crc32 GetCrc() const; 91 92 // Write and sync dirty pages to fd starting at offset. Returns 93 // number of pages synced. 94 uint32_t Sync(); 95 96 // Accessors. array()97 const uint8_t *array() const { return mmapper_->address(); } 98 template <class T> array_cast()99 const T *array_cast() const { 100 return reinterpret_cast<const T *>(array()); 101 } num_elts()102 uint32_t num_elts() const { return cur_num_; } max_num_elts()103 uint32_t max_num_elts() const { return max_num_; } max_size()104 uint32_t max_size() const { return max_num_elts() * elt_size_; } 105 106 // For stats. num_dirty_pages()107 uint32_t num_dirty_pages() const { 108 uint32_t num = 0; 109 for (size_t i = 0; i < dirty_pages_.size(); i++) { 110 if (dirty_pages_[i]) num++; 111 } 112 return num; 113 } 114 115 private: 116 // We track partial updates to the array for CRC updating. This 117 // requires extra memory to keep track of original buffers but 118 // allows for much faster CRC re-computation. This is the frac limit 119 // of byte len after which we will discard recorded changes and 120 // recompute the entire CRC instead. 121 static const uint32_t kPartialCrcLimitDiv; // 10 means limit is 1/10 122 123 // Grow file by at least this many elts if array is growable. 124 static const size_t kGrowElts; 125 126 // A change record (somebody called GetMutableMem on this 127 // region). We only keep changes <= changes_end_. 128 struct Change { ChangeChange129 Change(uint32_t o, uint32_t l) : elt_offset(o), elt_len(l) {} 130 131 uint32_t elt_offset; 132 uint32_t elt_len; 133 }; 134 static_assert(8 == sizeof(Change), "sizeof(Change) != 8"); 135 static_assert(4 == alignof(Change), "alignof(Change) != 4"); 136 137 void *GetMutableMemInternal(uint32_t elt_idx, uint32_t elt_len); 138 139 bool GrowIfNecessary(uint32_t num_elts); 140 141 int fd_; 142 size_t fd_offset_; 143 bool map_shared_; 144 IcingMMapper *mmapper_; 145 146 // Size of an element in the array. 147 uint32_t elt_size_; 148 149 // In bytes. 150 uint32_t cur_num_; // cur boundary of written elts 151 uint32_t changes_end_; // cur_num_ at last call to UpdateCrc 152 uint32_t max_num_; // size of array in elts 153 uint32_t capacity_num_; // num elts that can be accommodated by file size 154 155 uint32_t *crc_ptr_; 156 157 // Changes that have happened since the last update 158 // (between [0, changes_end_)). 159 std::vector<Change> changes_; 160 std::string saved_orig_buf_; 161 162 // Keep track of all pages we touched so we can write them back to 163 // disk. 164 std::vector<bool> dirty_pages_; 165 166 const IcingFilesystem &filesystem_; 167 }; 168 169 } // namespace lib 170 } // namespace icing 171 172 #endif // ICING_LEGACY_INDEX_ICING_ARRAY_STORAGE_H_ 173