xref: /aosp_15_r20/external/icing/icing/file/memory-mapped-file.h (revision 8b6cd535a057e39b3b86660c4aa06c99747c2136)
1 // Copyright (C) 2019 Google LLC
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 //      http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 // Allows memory-mapping a full file or a specific region within the file.
16 // It also supports efficiently switching the region being mapped.
17 //
18 // Note on Performance:
19 // It supports different optimized strategies for common patterns on both
20 // read-only and read-write files. This includes using read-ahead buffers for
21 // faster reads as well as background-sync vs manual-sync of changes to disk.
22 // For more details, see comments at MemoryMappedFile::Strategy.
23 //
24 // ** Usage 1: pre-mmap large memory and grow the underlying file internally **
25 //
26 // // Create MemoryMappedFile instance.
27 // ICING_ASSIGN_OR_RETURN(
28 //     std::unique_ptr<MemoryMappedFile> mmapped_file,
29 //     MemoryMappedFile::Create(filesystem, "/file.pb",
30 //                              READ_WRITE_AUTO_SYNC,
31 //                              max_file_size,
32 //                              /*pre_mapping_file_offset=*/0,
33 //                              /*pre_mapping_mmap_size=*/1024 * 1024));
34 //
35 // // Found that we need 4K bytes for the file and mmapped region.
36 // mmapped_file->GrowAndRemapIfNecessary(
37 //     /*new_file_offset=*/0, /*new_mmap_size=*/4 * 1024);
38 // char read_byte = mmapped_file->region()[4000];
39 // mmapped_file->mutable_region()[4001] = write_byte;
40 //
41 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
42 // disk.
43 //
44 // // Found that we need 2048 * 1024 bytes for the file and mmapped region.
45 // mmapped_file->GrowAndRemapIfNecessary(
46 //     /*new_file_offset=*/0, /*new_mmap_size=*/2048 * 1024);
47 // mmapped_file->mutable_region()[2000 * 1024] = write_byte;
48 // mmapped_file.reset();
49 //
50 // ** Usage 2: load by segments **
51 //
52 // ICING_ASSIGN_OR_RETURN(
53 //     std::unique_ptr<MemoryMappedFile> mmapped_file,
54 //     MemoryMappedFile::Create(filesystem, "/file.pb",
55 //                              READ_WRITE_AUTO_SYNC,
56 //                              max_file_size,
57 //                              /*pre_mapping_file_offset=*/0,
58 //                              /*pre_mapping_mmap_size=*/16 * 1024));
59 //
60 // // load the first 16K.
61 // mmapped_file->GrowAndRemapIfNecessary(
62 //     /*new_file_offset=*/0, /*new_mmap_size=*/16 * 1024);
63 // char read_byte = mmapped_file->region()[100];
64 // mmapped_file->mutable_region()[10] = write_byte;
65 //
66 // mmapped_file->PersistToDisk(); // Optional; immediately writes changes to
67 // disk.
68 //
69 // // load the next 16K.
70 // mmapped_file->GrowAndRemapIfNecessary(
71 //     /*new_file_offset=*/16 * 1024, /*new_mmap_size=*/16 * 1024);
72 // mmapped_file->mutable_region()[10] = write_byte;
73 // mmapped_file.reset();
74 
75 #ifndef ICING_FILE_MEMORY_MAPPED_FILE_H_
76 #define ICING_FILE_MEMORY_MAPPED_FILE_H_
77 
78 #include <unistd.h>
79 
80 #include <algorithm>
81 #include <cstdint>
82 #include <string>
83 #include <string_view>
84 
85 #include "icing/text_classifier/lib3/utils/base/status.h"
86 #include "icing/text_classifier/lib3/utils/base/statusor.h"
87 #include "icing/file/filesystem.h"
88 
89 namespace icing {
90 namespace lib {
91 
92 class MemoryMappedFile {
93  public:
system_page_size()94   static int64_t __attribute__((const)) system_page_size() {
95     static const int64_t page_size =
96         static_cast<int64_t>(sysconf(_SC_PAGE_SIZE));
97     return page_size;
98   }
99 
100   enum Strategy {
101     // Memory map a read-only file into a read-only memory region.
102     READ_ONLY,
103 
104     // Memory map a read-write file into a writable memory region. Any changes
105     // made to the region are automatically flushed to the underlying file in
106     // the background.
107     READ_WRITE_AUTO_SYNC,
108 
109     // Memory map a read-write file into a writable memory region. Changes made
110     // to this region will never be auto-synced to the underlying file. Unless
111     // the caller explicitly calls PersistToDisk(), all changes will be lost
112     // when the MemoryMappedFile is destroyed.
113     READ_WRITE_MANUAL_SYNC,
114   };
115 
116   // Absolute max file size, 16 GiB.
117   static constexpr int64_t kMaxFileSize = INT64_C(1) << 34;
118 
119   // Default max file size, 1 MiB.
120   static constexpr int64_t kDefaultMaxFileSize = INT64_C(1) << 20;
121 
122   // Creates a new MemoryMappedFile to read/write content to.
123   //
124   // filesystem    : Object to make system level calls
125   // file_path     : Full path of the file that needs to be memory-mapped.
126   // mmap_strategy : Strategy/optimizations to access the content.
127   // max_file_size : Maximum file size for MemoryMappedFile, default
128   //                 kDefaultMaxFileSize.
129   //
130   // Returns:
131   //   A MemoryMappedFile instance on success
132   //   OUT_OF_RANGE_ERROR if max_file_size is invalid
133   //   INTERNAL_ERROR on I/O error
134   static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
135       const Filesystem& filesystem, std::string_view file_path,
136       Strategy mmap_strategy, int64_t max_file_size = kDefaultMaxFileSize);
137 
138   // Creates a new MemoryMappedFile to read/write content to. It remaps when
139   // creating the instance, but doesn't check or grow the actual file size, so
140   // the caller should call GrowAndRemapIfNecessary before accessing region.
141   //
142   // filesystem    : Object to make system level calls
143   // file_path     : Full path of the file that needs to be memory-mapped.
144   // mmap_strategy : Strategy/optimizations to access the content.
145   // max_file_size : Maximum file size for MemoryMappedFile.
146   // pre_mapping_file_offset : The offset of the file to be memory mapped.
147   // pre_mapping_mmap_size   : mmap size for pre-mapping.
148   //
149   // Returns:
150   //   A MemoryMappedFile instance on success
151   //   OUT_OF_RANGE_ERROR if max_file_size, file_offset, or mmap_size is invalid
152   //   INTERNAL_ERROR on I/O error
153   static libtextclassifier3::StatusOr<MemoryMappedFile> Create(
154       const Filesystem& filesystem, std::string_view file_path,
155       Strategy mmap_strategy, int64_t max_file_size,
156       int64_t pre_mapping_file_offset, int64_t pre_mapping_mmap_size);
157 
158   // Delete copy constructor and assignment operator.
159   MemoryMappedFile(const MemoryMappedFile& other) = delete;
160   MemoryMappedFile& operator=(const MemoryMappedFile& other) = delete;
161 
162   MemoryMappedFile(MemoryMappedFile&& other);
163   MemoryMappedFile& operator=(MemoryMappedFile&& other);
164 
165   // Frees any region that is still memory-mapped region.
166   ~MemoryMappedFile();
167 
168   // TODO(b/247671531): migrate all callers to use GrowAndRemapIfNecessary and
169   // deprecate this API.
170   //
171   // Memory-map the newly specified region within the file specified by
172   // file_offset and mmap_size. Unmaps any previously mmapped region.
173   // It doesn't handle the underlying file growth.
174   //
175   // Returns any encountered IO error.
176   libtextclassifier3::Status Remap(int64_t file_offset, int64_t mmap_size);
177 
178   // Attempt to memory-map the newly specified region within the file specified
179   // by new_file_offset and new_mmap_size. It handles mmap and file growth
180   // intelligently.
181   // - Compute least file size needed according to new_file_offset and
182   //   new_mmap_size, and compare with the current file size. If requiring file
183   //   growth, then grow the underlying file (Write) or return error if
184   //   strategy_ is READ_ONLY.
185   // - If new_file_offset is different from the current file_offset_ or
186   //   new_mmap_size is greater than the current mmap_size_, then memory-map
187   //   the newly specified region and unmap any previously mmapped region.
188   //
189   // This API is useful for file growth since it grows the underlying file
190   // internally and handles remapping intelligently. By pre-mmapping a large
191   // memory, we only need to grow the underlying file (Write) without remapping
192   // in each round of growth, which significantly reduces the cost of system
193   // call and memory paging after remap.
194   //
195   // Returns:
196   //   OK on success
197   //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
198   //   Any error from GrowFileSize() and RemapImpl()
199   libtextclassifier3::Status GrowAndRemapIfNecessary(int64_t new_file_offset,
200                                                      int64_t new_mmap_size);
201 
202   // unmap and free-up the region that has currently been memory mapped.
203   void Unmap();
204 
205   // Explicitly persist any changes made to the currently mapped region to disk.
206   //
207   // NOTE: This is only valid if Strategy=READ_WRITE was used.
208   //
209   // Returns:
210   //   OK on success
211   //   INTERNAL on I/O error
212   //   FAILED_PRECONDITION if Strategy is not implemented
213   libtextclassifier3::Status PersistToDisk();
214 
215   // Advise the system to help it optimize the memory-mapped region for
216   // upcoming read/write operations.
217   //
218   // NOTE: See linux documentation of madvise() for additional details.
219   enum AccessPattern {
220     // Future memory access are expected to be in random order. So, readhead
221     // will have limited impact on latency.
222     ACCESS_RANDOM,
223 
224     // Future memory access are expected to be sequential. So, some readahead
225     // can greatly improve latency.
226     ACCESS_SEQUENTIAL,
227 
228     // Future memory access is expected to be high-volume and all over the file.
229     // So, preloading the whole region into memory would greatly improve
230     // latency.
231     ACCESS_ALL,
232 
233     // Future memory access is expected to be rare. So, it is best to free up
234     // as much of preloaded memory as possible.
235     ACCESS_NONE,
236   };
237   libtextclassifier3::Status OptimizeFor(AccessPattern access_pattern);
238 
strategy()239   Strategy strategy() const { return strategy_; }
240 
max_file_size()241   int64_t max_file_size() const { return max_file_size_; }
242 
243   // Accessors to the memory-mapped region. Returns null if nothing is mapped.
region()244   const char* region() const {
245     return reinterpret_cast<const char*>(mmap_result_) + alignment_adjustment_;
246   }
mutable_region()247   char* mutable_region() {
248     return reinterpret_cast<char*>(mmap_result_) + alignment_adjustment_;
249   }
250 
file_offset()251   int64_t file_offset() const { return file_offset_; }
252 
253   // TODO(b/247671531): remove this API after migrating all callers to use
254   //                    GrowAndRemapIfNecessary.
region_size()255   int64_t region_size() const { return mmap_size_; }
256 
257   // The size that is safe for the client to read/write. This is only valid for
258   // callers that use GrowAndRemapIfNecessary.
available_size()259   int64_t available_size() const {
260     return std::min(mmap_size_,
261                     std::max(INT64_C(0), file_size_ - file_offset_));
262   }
263 
264  private:
265   explicit MemoryMappedFile(const Filesystem& filesystem,
266                             std::string_view file_path, Strategy mmap_strategy,
267                             int64_t max_file_size, int64_t file_size);
268 
269   // Grow the underlying file to new_file_size.
270   // Note: it is possible that Write() (implemented in the file system call
271   // library) grows the underlying file partially and returns error due to
272   // failures, so the cached file_size_ may contain out-of-date value, but it is
273   // still guaranteed that file_size_ is always smaller or equal to the actual
274   // file size. In the next round of growing:
275   // - If new_file_size is not greater than file_size_, then we're still
276   //   confident that the actual file size is large enough and therefore skip
277   //   the grow process.
278   // - If new_file_size is greater than file_size_, then we will invoke the
279   //   system call to sync the actual file size. At this moment, file_size_ is
280   //   the actual file size and therefore we can grow the underlying file size
281   //   correctly.
282   //
283   // Returns:
284   //   OK on success
285   //   FAILED_PRECONDITION_ERROR if requiring file growth and strategy_ is
286   //                             READ_ONLY
287   //   OUT_OF_RANGE_ERROR if new_mmap_size exceeds max_file_size_
288   //   INTERNAL_ERROR on I/O error
289   libtextclassifier3::Status GrowFileSize(int64_t new_file_size);
290 
291   // Memory-map the newly specified region within the file specified by
292   // new_file_offset and new_mmap_size. Unmaps any previously mmapped region.
293   // It doesn't handle the underlying file growth.
294   //
295   // Returns:
296   //   OK on success
297   //   OUT_OF_RANGE_ERROR if new_file_offset and new_mmap_size is invalid
298   //   INTERNAL_ERROR on I/O error
299   libtextclassifier3::Status RemapImpl(int64_t new_file_offset,
300                                        int64_t new_mmap_size);
301 
302   // Swaps the contents of this with other.
303   void Swap(MemoryMappedFile* other);
304 
adjusted_offset()305   int64_t adjusted_offset() const {
306     return file_offset_ - alignment_adjustment_;
307   }
308 
adjusted_mmap_size()309   int64_t adjusted_mmap_size() const {
310     return alignment_adjustment_ + mmap_size_;
311   }
312 
313   // Cached constructor params.
314   const Filesystem* filesystem_;
315   std::string file_path_;
316   Strategy strategy_ = Strategy::READ_WRITE_AUTO_SYNC;
317 
318   // Raw file related fields:
319   // - max_file_size_
320   // - file_size_
321 
322   // Max file size for MemoryMappedFile. It should not exceed the absolute max
323   // size of memory mapped file (kMaxFileSize). It is only used in
324   // GrowAndRemapIfNecessary(), the new API that handles underlying file growth
325   // internally and remaps intelligently.
326   //
327   // Note: max_file_size_ will be specified in runtime and the caller should
328   // make sure its value is correct and reasonable.
329   int64_t max_file_size_ = 0;
330 
331   // Cached file size to avoid calling system call too frequently. It is only
332   // used in GrowAndRemapIfNecessary(), the new API that handles underlying file
333   // growth internally and remaps intelligently.
334   //
335   // Note: it is guaranteed that file_size_ is smaller or equal to the actual
336   // file size as long as the underlying file hasn't been truncated or deleted
337   // externally. See GrowFileSize() for more details.
338   int64_t file_size_ = 0;
339 
340   // Memory mapped related fields:
341   // - mmap_result_
342   // - file_offset_
343   // - alignment_adjustment_
344   // - mmap_size_
345 
346   // Raw pointer (or error) returned by calls to mmap().
347   void* mmap_result_ = nullptr;
348 
349   // Offset within the file at which the current memory-mapped region starts.
350   int64_t file_offset_ = 0;
351 
352   // Size that is currently memory-mapped.
353   // Note that the mmapped size can be larger than the underlying file size. We
354   // can reduce remapping by pre-mmapping a large memory and grow the file size
355   // later. See GrowAndRemapIfNecessary().
356   int64_t mmap_size_ = 0;
357 
358   // The difference between file_offset_ and the actual adjusted (aligned)
359   // offset.
360   // Since mmap requires the offset to be a multiple of system page size, we
361   // have to align file_offset_ to the last multiple of system page size.
362   int64_t alignment_adjustment_ = 0;
363 
364   // E.g. system_page_size = 5, RemapImpl(/*new_file_offset=*/8, mmap_size)
365   //
366   // File layout:               xxxxx xxxxx xxxxx xxxxx xxxxx xx
367   // file_offset_:                       8
368   // adjusted_offset():               5
369   // region()/mutable_region():          |
370   // mmap_result_:                    |
371   //
372   // alignment_adjustment_: file_offset_ - adjusted_offset()
373   // mmap_size_:            mmap_size
374   // region_size():         mmap_size_
375   // available_size():      std::min(mmap_size_,
376   //                                 std::max(0, file_size_ - file_offset_))
377   // region_range:          [file_offset_, file_offset + mmap_size)
378   // adjusted_mmap_size():  alignment_adjustment_ + mmap_size_
379   // adjusted_mmap_range:   [alignment_offset, file_offset + mmap_size)
380 };
381 
382 }  // namespace lib
383 }  // namespace icing
384 
385 #endif  // ICING_FILE_MEMORY_MAPPED_FILE_H_
386