1 // Copyright (C) 2019 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Allows memory-mapping a full file or a specific region within the file. 16 // It also supports efficiently switching the region being mapped. 17 // 18 // Note on Performance: 19 // It supports different optimized strategies for common patterns on both 20 // read-only and read-write files. This includes using read-ahead buffers for 21 // faster reads as well as background-sync vs manual-sync of changes to disk. 22 // For more details, see comments at MemoryMappedFile::Strategy. 23 // 24 // ** Usage 1: pre-mmap large memory and grow the underlying file internally ** 25 // 26 // // Create MemoryMappedFile instance. 27 // ICING_ASSIGN_OR_RETURN( 28 // std::unique_ptr<MemoryMappedFile> mmapped_file, 29 // MemoryMappedFile::Create(filesystem, "/file.pb", 30 // READ_WRITE_AUTO_SYNC, 31 // max_file_size, 32 // /*pre_mapping_file_offset=*/0, 33 // /*pre_mapping_mmap_size=*/1024 * 1024)); 34 // 35 // // Found that we need 4K bytes for the file and mmapped region. 36 // mmapped_file->GrowAndRemapIfNecessary( 37 // /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024); 38 // char read_byte = mmapped_file->region()[4000]; 39 // mmapped_file->mutable_region()[4001] = write_byte; 40 // 41 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to 42 // disk. 43 // 44 // // Found that we need 2048 * 1024 bytes for the file and mmapped region. 45 // mmapped_file->GrowAndRemapIfNecessary( 46 // /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024); 47 // mmapped_file->mutable_region()[2000 * 1024] = write_byte; 48 // mmapped_file.reset(); 49 // 50 // ** Usage 2: load by segments ** 51 // 52 // ICING_ASSIGN_OR_RETURN( 53 // std::unique_ptr<MemoryMappedFile> mmapped_file, 54 // MemoryMappedFile::Create(filesystem, "/file.pb", 55 // READ_WRITE_AUTO_SYNC, 56 // max_file_size, 57 // /*pre_mapping_file_offset=*/0, 58 // /*pre_mapping_mmap_size=*/16 * 1024)); 59 // 60 // // load the first 16K. 61 // mmapped_file->GrowAndRemapIfNecessary( 62 // /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024); 63 // char read_byte = mmapped_file->region()[100]; 64 // mmapped_file->mutable_region()[10] = write_byte; 65 // 66 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to 67 // disk. 68 // 69 // // load the next 16K. 70 // mmapped_file->GrowAndRemapIfNecessary( 71 // /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024); 72 // mmapped_file->mutable_region()[10] = write_byte; 73 // mmapped_file.reset(); 74 75 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_ 76 #define ICING_FILE_MEMORY_MAPPED_FILE_H_ 77 78 #include <unistd.h> 79 80 #include <algorithm> 81 #include <cstdint> 82 #include <string> 83 #include <string_view> 84 85 #include "icing/text_classifier/lib3/utils/base/status.h" 86 #include "icing/text_classifier/lib3/utils/base/statusor.h" 87 #include "icing/file/filesystem.h" 88 89 namespace icing { 90 namespace lib { 91 92 class MemoryMappedFile { 93 public: system_page_size()94 static int64_t __attribute__((const)) system_page_size() { 95 static const int64_t page_size = 96 static_cast<int64_t>(sysconf(_SC_PAGE_SIZE)); 97 return page_size; 98 } 99 100 enum Strategy { 101 // Memory map a read-only file into a read-only memory region. 102 READ_ONLY, 103 104 // Memory map a read-write file into a writable memory region. Any changes 105 // made to the region are automatically flushed to the underlying file in 106 // the background. 107 READ_WRITE_AUTO_SYNC, 108 109 // Memory map a read-write file into a writable memory region. Changes made 110 // to this region will never be auto-synced to the underlying file. Unless 111 // the caller explicitly calls PersistToDisk(), all changes will be lost 112 // when the MemoryMappedFile is destroyed. 113 READ_WRITE_MANUAL_SYNC, 114 }; 115 116 // Absolute max file size, 16 GiB. 117 static constexpr int64_t kMaxFileSize = INT64_C(1) << 34; 118 119 // Default max file size, 1 MiB. 120 static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20; 121 122 // Creates a new MemoryMappedFile to read/write content to. 123 // 124 // filesystem : Object to make system level calls 125 // file_path : Full path of the file that needs to be memory-mapped. 126 // mmap_strategy : Strategy/optimizations to access the content. 127 // max_file_size : Maximum file size for MemoryMappedFile, default 128 // kDefaultMaxFileSize. 129 // 130 // Returns: 131 // A MemoryMappedFile instance on success 132 // OUT_OF_RANGE_ERROR if max_file_size is invalid 133 // INTERNAL_ERROR on I/O error 134 static libtextclassifier3::StatusOr<MemoryMappedFile> Create( 135 const Filesystem& filesystem, std::string_view file_path, 136 Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize); 137 138 // Creates a new MemoryMappedFile to read/write content to. It remaps when 139 // creating the instance, but doesn't check or grow the actual file size, so 140 // the caller should call GrowAndRemapIfNecessary before accessing region. 141 // 142 // filesystem : Object to make system level calls 143 // file_path : Full path of the file that needs to be memory-mapped. 144 // mmap_strategy : Strategy/optimizations to access the content. 145 // max_file_size : Maximum file size for MemoryMappedFile. 146 // pre_mapping_file_offset : The offset of the file to be memory mapped. 147 // pre_mapping_mmap_size : mmap size for pre-mapping. 148 // 149 // Returns: 150 // A MemoryMappedFile instance on success 151 // OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid 152 // INTERNAL_ERROR on I/O error 153 static libtextclassifier3::StatusOr<MemoryMappedFile> Create( 154 const Filesystem& filesystem, std::string_view file_path, 155 Strategy mmap_strategy, int64_t max_file_size, 156 int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size); 157 158 // Delete copy constructor and assignment operator. 159 MemoryMappedFile(const MemoryMappedFile& other) = delete; 160 MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete; 161 162 MemoryMappedFile(MemoryMappedFile&& other); 163 MemoryMappedFile& operator=(MemoryMappedFile&& other); 164 165 // Frees any region that is still memory-mapped region. 166 ~MemoryMappedFile(); 167 168 // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and 169 // deprecate this API. 170 // 171 // Memory-map the newly specified region within the file specified by 172 // file_offset and mmap_size. Unmaps any previously mmapped region. 173 // It doesn't handle the underlying file growth. 174 // 175 // Returns any encountered IO error. 176 libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size); 177 178 // Attempt to memory-map the newly specified region within the file specified 179 // by new_file_offset and new_mmap_size. It handles mmap and file growth 180 // intelligently. 181 // - Compute least file size needed according to new_file_offset and 182 // new_mmap_size, and compare with the current file size. If requiring file 183 // growth, then grow the underlying file (Write) or return error if 184 // strategy_ is READ_ONLY. 185 // - If new_file_offset is different from the current file_offset_ or 186 // new_mmap_size is greater than the current mmap_size_, then memory-map 187 // the newly specified region and unmap any previously mmapped region. 188 // 189 // This API is useful for file growth since it grows the underlying file 190 // internally and handles remapping intelligently. By pre-mmapping a large 191 // memory, we only need to grow the underlying file (Write) without remapping 192 // in each round of growth, which significantly reduces the cost of system 193 // call and memory paging after remap. 194 // 195 // Returns: 196 // OK on success 197 // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid 198 // Any error from GrowFileSize() and RemapImpl() 199 libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset, 200 int64_t new_mmap_size); 201 202 // unmap and free-up the region that has currently been memory mapped. 203 void Unmap(); 204 205 // Explicitly persist any changes made to the currently mapped region to disk. 206 // 207 // NOTE: This is only valid if Strategy=READ_WRITE was used. 208 // 209 // Returns: 210 // OK on success 211 // INTERNAL on I/O error 212 // FAILED_PRECONDITION if Strategy is not implemented 213 libtextclassifier3::Status PersistToDisk(); 214 215 // Advise the system to help it optimize the memory-mapped region for 216 // upcoming read/write operations. 217 // 218 // NOTE: See linux documentation of madvise() for additional details. 219 enum AccessPattern { 220 // Future memory access are expected to be in random order. So, readhead 221 // will have limited impact on latency. 222 ACCESS_RANDOM, 223 224 // Future memory access are expected to be sequential. So, some readahead 225 // can greatly improve latency. 226 ACCESS_SEQUENTIAL, 227 228 // Future memory access is expected to be high-volume and all over the file. 229 // So, preloading the whole region into memory would greatly improve 230 // latency. 231 ACCESS_ALL, 232 233 // Future memory access is expected to be rare. So, it is best to free up 234 // as much of preloaded memory as possible. 235 ACCESS_NONE, 236 }; 237 libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern); 238 strategy()239 Strategy strategy() const { return strategy_; } 240 max_file_size()241 int64_t max_file_size() const { return max_file_size_; } 242 243 // Accessors to the memory-mapped region. Returns null if nothing is mapped. region()244 const char* region() const { 245 return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_; 246 } mutable_region()247 char* mutable_region() { 248 return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_; 249 } 250 file_offset()251 int64_t file_offset() const { return file_offset_; } 252 253 // TODO(b/247671531): remove this API after migrating all callers to use 254 // GrowAndRemapIfNecessary. region_size()255 int64_t region_size() const { return mmap_size_; } 256 257 // The size that is safe for the client to read/write. This is only valid for 258 // callers that use GrowAndRemapIfNecessary. available_size()259 int64_t available_size() const { 260 return std::min(mmap_size_, 261 std::max(INT64_C(0), file_size_ - file_offset_)); 262 } 263 264 private: 265 explicit MemoryMappedFile(const Filesystem& filesystem, 266 std::string_view file_path, Strategy mmap_strategy, 267 int64_t max_file_size, int64_t file_size); 268 269 // Grow the underlying file to new_file_size. 270 // Note: it is possible that Write() (implemented in the file system call 271 // library) grows the underlying file partially and returns error due to 272 // failures, so the cached file_size_ may contain out-of-date value, but it is 273 // still guaranteed that file_size_ is always smaller or equal to the actual 274 // file size. In the next round of growing: 275 // - If new_file_size is not greater than file_size_, then we're still 276 // confident that the actual file size is large enough and therefore skip 277 // the grow process. 278 // - If new_file_size is greater than file_size_, then we will invoke the 279 // system call to sync the actual file size. At this moment, file_size_ is 280 // the actual file size and therefore we can grow the underlying file size 281 // correctly. 282 // 283 // Returns: 284 // OK on success 285 // FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is 286 // READ_ONLY 287 // OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_ 288 // INTERNAL_ERROR on I/O error 289 libtextclassifier3::Status GrowFileSize(int64_t new_file_size); 290 291 // Memory-map the newly specified region within the file specified by 292 // new_file_offset and new_mmap_size. Unmaps any previously mmapped region. 293 // It doesn't handle the underlying file growth. 294 // 295 // Returns: 296 // OK on success 297 // OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid 298 // INTERNAL_ERROR on I/O error 299 libtextclassifier3::Status RemapImpl(int64_t new_file_offset, 300 int64_t new_mmap_size); 301 302 // Swaps the contents of this with other. 303 void Swap(MemoryMappedFile* other); 304 adjusted_offset()305 int64_t adjusted_offset() const { 306 return file_offset_ - alignment_adjustment_; 307 } 308 adjusted_mmap_size()309 int64_t adjusted_mmap_size() const { 310 return alignment_adjustment_ + mmap_size_; 311 } 312 313 // Cached constructor params. 314 const Filesystem* filesystem_; 315 std::string file_path_; 316 Strategy strategy_ = Strategy::READ_WRITE_AUTO_SYNC; 317 318 // Raw file related fields: 319 // - max_file_size_ 320 // - file_size_ 321 322 // Max file size for MemoryMappedFile. It should not exceed the absolute max 323 // size of memory mapped file (kMaxFileSize). It is only used in 324 // GrowAndRemapIfNecessary(), the new API that handles underlying file growth 325 // internally and remaps intelligently. 326 // 327 // Note: max_file_size_ will be specified in runtime and the caller should 328 // make sure its value is correct and reasonable. 329 int64_t max_file_size_ = 0; 330 331 // Cached file size to avoid calling system call too frequently. It is only 332 // used in GrowAndRemapIfNecessary(), the new API that handles underlying file 333 // growth internally and remaps intelligently. 334 // 335 // Note: it is guaranteed that file_size_ is smaller or equal to the actual 336 // file size as long as the underlying file hasn't been truncated or deleted 337 // externally. See GrowFileSize() for more details. 338 int64_t file_size_ = 0; 339 340 // Memory mapped related fields: 341 // - mmap_result_ 342 // - file_offset_ 343 // - alignment_adjustment_ 344 // - mmap_size_ 345 346 // Raw pointer (or error) returned by calls to mmap(). 347 void* mmap_result_ = nullptr; 348 349 // Offset within the file at which the current memory-mapped region starts. 350 int64_t file_offset_ = 0; 351 352 // Size that is currently memory-mapped. 353 // Note that the mmapped size can be larger than the underlying file size. We 354 // can reduce remapping by pre-mmapping a large memory and grow the file size 355 // later. See GrowAndRemapIfNecessary(). 356 int64_t mmap_size_ = 0; 357 358 // The difference between file_offset_ and the actual adjusted (aligned) 359 // offset. 360 // Since mmap requires the offset to be a multiple of system page size, we 361 // have to align file_offset_ to the last multiple of system page size. 362 int64_t alignment_adjustment_ = 0; 363 364 // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size) 365 // 366 // File layout: xxxxx xxxxx xxxxx xxxxx xxxxx xx 367 // file_offset_: 8 368 // adjusted_offset(): 5 369 // region()/mutable_region(): | 370 // mmap_result_: | 371 // 372 // alignment_adjustment_: file_offset_ - adjusted_offset() 373 // mmap_size_: mmap_size 374 // region_size(): mmap_size_ 375 // available_size(): std::min(mmap_size_, 376 // std::max(0, file_size_ - file_offset_)) 377 // region_range: [file_offset_, file_offset + mmap_size) 378 // adjusted_mmap_size(): alignment_adjustment_ + mmap_size_ 379 // adjusted_mmap_range: [alignment_offset, file_offset + mmap_size) 380 }; 381 382 } // namespace lib 383 } // namespace icing 384 385 #endif // ICING_FILE_MEMORY_MAPPED_FILE_H_ 386