xref: /aosp_15_r20/external/executorch/extension/data_loader/file_data_loader.cpp (revision 523fa7a60841cd1ecfb9cc4201f1ca8b03ed023a)
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD-style license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 #include <executorch/extension/data_loader/file_data_loader.h>
10 
11 #include <algorithm>
12 #include <cerrno>
13 #include <cstddef>
14 #include <cstring>
15 #include <limits>
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <unistd.h>
21 
22 #include <executorch/runtime/core/error.h>
23 #include <executorch/runtime/core/result.h>
24 #include <executorch/runtime/platform/log.h>
25 
26 // Some platforms (e.g. Xtensa) do not support pread() that we use to read the
27 // file at different offsets simultaneously from multiple threads not affecting
28 // each other. We list them below and use a workaround for them.
29 #if defined(__xtensa__)
30 #define ET_HAVE_PREAD 0
31 #endif // defined(__xtensa__)
32 
33 #ifndef ET_HAVE_PREAD
34 #define ET_HAVE_PREAD 1
35 #endif // !ET_HAVE_PREAD
36 
37 using executorch::runtime::Error;
38 using executorch::runtime::FreeableBuffer;
39 using executorch::runtime::Result;
40 
41 namespace executorch {
42 namespace extension {
43 
44 namespace {
45 
46 /**
47  * Returns true if the value is an integer power of 2.
48  */
is_power_of_2(size_t value)49 static bool is_power_of_2(size_t value) {
50   return value > 0 && (value & ~(value - 1)) == value;
51 }
52 
53 /**
54  * Returns the next alignment for a given pointer.
55  */
align_pointer(void * ptr,size_t alignment)56 static uint8_t* align_pointer(void* ptr, size_t alignment) {
57   intptr_t addr = reinterpret_cast<intptr_t>(ptr);
58   if ((addr & (alignment - 1)) == 0) {
59     // Already aligned.
60     return reinterpret_cast<uint8_t*>(ptr);
61   }
62   // Bump forward.
63   addr = (addr | (alignment - 1)) + 1;
64   return reinterpret_cast<uint8_t*>(addr);
65 }
66 } // namespace
67 
~FileDataLoader()68 FileDataLoader::~FileDataLoader() {
69   // file_name_ can be nullptr if this instance was moved from, but freeing a
70   // null pointer is safe.
71   std::free(const_cast<char*>(file_name_));
72   // fd_ can be -1 if this instance was moved from, but closing a negative fd is
73   // safe (though it will return an error).
74   ::close(fd_);
75 }
76 
from(const char * file_name,size_t alignment)77 Result<FileDataLoader> FileDataLoader::from(
78     const char* file_name,
79     size_t alignment) {
80   ET_CHECK_OR_RETURN_ERROR(
81       is_power_of_2(alignment),
82       InvalidArgument,
83       "Alignment %zu is not a power of 2",
84       alignment);
85 
86   // Use open() instead of fopen() to avoid the layer of buffering that
87   // fopen() does. We will be reading large portions of the file in one shot,
88   // so buffering does not help.
89   int fd = ::open(file_name, O_RDONLY);
90   if (fd < 0) {
91     ET_LOG(
92         Error, "Failed to open %s: %s (%d)", file_name, strerror(errno), errno);
93     return Error::AccessFailed;
94   }
95 
96   // Cache the file size.
97   struct stat st;
98   int err = ::fstat(fd, &st);
99   if (err < 0) {
100     ET_LOG(
101         Error,
102         "Could not get length of %s: %s (%d)",
103         file_name,
104         ::strerror(errno),
105         errno);
106     ::close(fd);
107     return Error::AccessFailed;
108   }
109   size_t file_size = st.st_size;
110 
111   // Copy the filename so we can print better debug messages if reads fail.
112   const char* file_name_copy = ::strdup(file_name);
113   if (file_name_copy == nullptr) {
114     ET_LOG(Error, "strdup(%s) failed", file_name);
115     ::close(fd);
116     return Error::MemoryAllocationFailed;
117   }
118 
119   return FileDataLoader(fd, file_size, alignment, file_name_copy);
120 }
121 
122 namespace {
123 /**
124  * FreeableBuffer::FreeFn-compatible callback.
125  *
126  * `context` is actually a ptrdiff_t value (not a pointer) that contains the
127  * offset in bytes between `data` and the actual pointer to free.
128  */
FreeSegment(void * context,void * data,ET_UNUSED size_t size)129 void FreeSegment(void* context, void* data, ET_UNUSED size_t size) {
130   ptrdiff_t offset = reinterpret_cast<ptrdiff_t>(context);
131   ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset);
132   std::free(static_cast<uint8_t*>(data) - offset);
133 }
134 } // namespace
135 
load(size_t offset,size_t size,ET_UNUSED const DataLoader::SegmentInfo & segment_info) const136 Result<FreeableBuffer> FileDataLoader::load(
137     size_t offset,
138     size_t size,
139     ET_UNUSED const DataLoader::SegmentInfo& segment_info) const {
140   ET_CHECK_OR_RETURN_ERROR(
141       // Probably had its value moved to another instance.
142       fd_ >= 0,
143       InvalidState,
144       "Uninitialized");
145   ET_CHECK_OR_RETURN_ERROR(
146       offset + size <= file_size_,
147       InvalidArgument,
148       "File %s: offset %zu + size %zu > file_size_ %zu",
149       file_name_,
150       offset,
151       size,
152       file_size_);
153 
154   // Don't bother allocating/freeing for empty segments.
155   if (size == 0) {
156     return FreeableBuffer(nullptr, 0, /*free_fn=*/nullptr);
157   }
158 
159   // Allocate memory for the FreeableBuffer.
160   size_t alloc_size = size;
161   if (alignment_ > alignof(std::max_align_t)) {
162     // malloc() will align to smaller values, but we must manually align to
163     // larger values.
164     alloc_size += alignment_;
165   }
166   void* buffer = std::malloc(alloc_size);
167   if (buffer == nullptr) {
168     ET_LOG(
169         Error,
170         "Reading from %s at offset %zu: malloc(%zd) failed",
171         file_name_,
172         offset,
173         size);
174     return Error::MemoryAllocationFailed;
175   }
176 
177   // Align.
178   void* aligned_buffer = align_pointer(buffer, alignment_);
179 
180   // Assert that the alignment didn't overflow the buffer.
181   ET_DCHECK_MSG(
182       reinterpret_cast<uintptr_t>(aligned_buffer) + size <=
183           reinterpret_cast<uintptr_t>(buffer) + alloc_size,
184       "aligned_buffer %p + size %zu > buffer %p + alloc_size %zu",
185       aligned_buffer,
186       size,
187       buffer,
188       alloc_size);
189 
190   auto err = load_into(offset, size, segment_info, aligned_buffer);
191   if (err != Error::Ok) {
192     // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
193     std::free(buffer);
194     return err;
195   }
196 
197   // We can't naively free this pointer, since it may not be what malloc() gave
198   // us. Pass the offset to the real buffer as context. This is the number of
199   // bytes that need to be subtracted from the FreeableBuffer::data() pointer to
200   // find the actual pointer to free.
201   return FreeableBuffer(
202       aligned_buffer,
203       size,
204       FreeSegment,
205       /*free_fn_context=*/
206       reinterpret_cast<void*>(
207           // Using signed types here because it will produce a signed ptrdiff_t
208           // value, though for us it will always be non-negative.
209           reinterpret_cast<intptr_t>(aligned_buffer) -
210           reinterpret_cast<intptr_t>(buffer)));
211 }
212 
size() const213 Result<size_t> FileDataLoader::size() const {
214   ET_CHECK_OR_RETURN_ERROR(
215       // Probably had its value moved to another instance.
216       fd_ >= 0,
217       InvalidState,
218       "Uninitialized");
219   return file_size_;
220 }
221 
load_into(size_t offset,size_t size,ET_UNUSED const SegmentInfo & segment_info,void * buffer) const222 ET_NODISCARD Error FileDataLoader::load_into(
223     size_t offset,
224     size_t size,
225     ET_UNUSED const SegmentInfo& segment_info,
226     void* buffer) const {
227   ET_CHECK_OR_RETURN_ERROR(
228       // Probably had its value moved to another instance.
229       fd_ >= 0,
230       InvalidState,
231       "Uninitialized");
232   ET_CHECK_OR_RETURN_ERROR(
233       offset + size <= file_size_,
234       InvalidArgument,
235       "File %s: offset %zu + size %zu > file_size_ %zu",
236       file_name_,
237       offset,
238       size,
239       file_size_);
240   ET_CHECK_OR_RETURN_ERROR(
241       buffer != nullptr, InvalidArgument, "Provided buffer cannot be null");
242 
243   // Read the data into the aligned address.
244   size_t needed = size;
245   uint8_t* buf = reinterpret_cast<uint8_t*>(buffer);
246 
247   // Make a duplicate fd if pread() is not available and we have to seek().
248   // Cannot use the standard dup() or fcntl() calls because the returned
249   // duplicate will share the underlying file record and affect the original fd
250   // when seeking on multiple threads simultaneously.
251   const auto dup_fd = ET_HAVE_PREAD ? fd_ : ::open(file_name_, O_RDONLY);
252 
253   while (needed > 0) {
254     // Reads on macOS will fail with EINVAL if size > INT32_MAX.
255     const auto chunk_size = std::min<size_t>(
256         needed, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
257     const auto nread =
258 #if ET_HAVE_PREAD
259         ::pread(dup_fd, buf, chunk_size, offset);
260 #else
261         (::lseek(dup_fd, offset, SEEK_SET) == (off_t)-1)
262         ? -1
263         : ::read(dup_fd, buf, chunk_size);
264 #endif
265     if (nread < 0 && errno == EINTR) {
266       // Interrupted by a signal; zero bytes read.
267       continue;
268     }
269     if (nread <= 0) {
270       // nread == 0 means EOF, which we shouldn't see if we were able to read
271       // the full amount. nread < 0 means an error occurred.
272       ET_LOG(
273           Error,
274           "Reading from %s: failed to read %zu bytes at offset %zu: %s",
275           file_name_,
276           size,
277           offset,
278           nread == 0 ? "EOF" : strerror(errno));
279       if (!ET_HAVE_PREAD) {
280         ::close(dup_fd);
281       }
282       return Error::AccessFailed;
283     }
284     needed -= nread;
285     buf += nread;
286     offset += nread;
287   }
288   if (!ET_HAVE_PREAD) {
289     ::close(dup_fd);
290   }
291   return Error::Ok;
292 }
293 
294 } // namespace extension
295 } // namespace executorch
296