1 /*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 * All rights reserved.
4 *
5 * This source code is licensed under the BSD-style license found in the
6 * LICENSE file in the root directory of this source tree.
7 */
8
9 #include <executorch/extension/data_loader/file_data_loader.h>
10
11 #include <algorithm>
12 #include <cerrno>
13 #include <cstddef>
14 #include <cstring>
15 #include <limits>
16
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 #include <unistd.h>
21
22 #include <executorch/runtime/core/error.h>
23 #include <executorch/runtime/core/result.h>
24 #include <executorch/runtime/platform/log.h>
25
26 // Some platforms (e.g. Xtensa) do not support pread() that we use to read the
27 // file at different offsets simultaneously from multiple threads not affecting
28 // each other. We list them below and use a workaround for them.
29 #if defined(__xtensa__)
30 #define ET_HAVE_PREAD 0
31 #endif // defined(__xtensa__)
32
33 #ifndef ET_HAVE_PREAD
34 #define ET_HAVE_PREAD 1
35 #endif // !ET_HAVE_PREAD
36
37 using executorch::runtime::Error;
38 using executorch::runtime::FreeableBuffer;
39 using executorch::runtime::Result;
40
41 namespace executorch {
42 namespace extension {
43
44 namespace {
45
46 /**
47 * Returns true if the value is an integer power of 2.
48 */
is_power_of_2(size_t value)49 static bool is_power_of_2(size_t value) {
50 return value > 0 && (value & ~(value - 1)) == value;
51 }
52
53 /**
54 * Returns the next alignment for a given pointer.
55 */
align_pointer(void * ptr,size_t alignment)56 static uint8_t* align_pointer(void* ptr, size_t alignment) {
57 intptr_t addr = reinterpret_cast<intptr_t>(ptr);
58 if ((addr & (alignment - 1)) == 0) {
59 // Already aligned.
60 return reinterpret_cast<uint8_t*>(ptr);
61 }
62 // Bump forward.
63 addr = (addr | (alignment - 1)) + 1;
64 return reinterpret_cast<uint8_t*>(addr);
65 }
66 } // namespace
67
~FileDataLoader()68 FileDataLoader::~FileDataLoader() {
69 // file_name_ can be nullptr if this instance was moved from, but freeing a
70 // null pointer is safe.
71 std::free(const_cast<char*>(file_name_));
72 // fd_ can be -1 if this instance was moved from, but closing a negative fd is
73 // safe (though it will return an error).
74 ::close(fd_);
75 }
76
from(const char * file_name,size_t alignment)77 Result<FileDataLoader> FileDataLoader::from(
78 const char* file_name,
79 size_t alignment) {
80 ET_CHECK_OR_RETURN_ERROR(
81 is_power_of_2(alignment),
82 InvalidArgument,
83 "Alignment %zu is not a power of 2",
84 alignment);
85
86 // Use open() instead of fopen() to avoid the layer of buffering that
87 // fopen() does. We will be reading large portions of the file in one shot,
88 // so buffering does not help.
89 int fd = ::open(file_name, O_RDONLY);
90 if (fd < 0) {
91 ET_LOG(
92 Error, "Failed to open %s: %s (%d)", file_name, strerror(errno), errno);
93 return Error::AccessFailed;
94 }
95
96 // Cache the file size.
97 struct stat st;
98 int err = ::fstat(fd, &st);
99 if (err < 0) {
100 ET_LOG(
101 Error,
102 "Could not get length of %s: %s (%d)",
103 file_name,
104 ::strerror(errno),
105 errno);
106 ::close(fd);
107 return Error::AccessFailed;
108 }
109 size_t file_size = st.st_size;
110
111 // Copy the filename so we can print better debug messages if reads fail.
112 const char* file_name_copy = ::strdup(file_name);
113 if (file_name_copy == nullptr) {
114 ET_LOG(Error, "strdup(%s) failed", file_name);
115 ::close(fd);
116 return Error::MemoryAllocationFailed;
117 }
118
119 return FileDataLoader(fd, file_size, alignment, file_name_copy);
120 }
121
122 namespace {
123 /**
124 * FreeableBuffer::FreeFn-compatible callback.
125 *
126 * `context` is actually a ptrdiff_t value (not a pointer) that contains the
127 * offset in bytes between `data` and the actual pointer to free.
128 */
FreeSegment(void * context,void * data,ET_UNUSED size_t size)129 void FreeSegment(void* context, void* data, ET_UNUSED size_t size) {
130 ptrdiff_t offset = reinterpret_cast<ptrdiff_t>(context);
131 ET_DCHECK_MSG(offset >= 0, "Unexpected offset %ld", (long int)offset);
132 std::free(static_cast<uint8_t*>(data) - offset);
133 }
134 } // namespace
135
load(size_t offset,size_t size,ET_UNUSED const DataLoader::SegmentInfo & segment_info) const136 Result<FreeableBuffer> FileDataLoader::load(
137 size_t offset,
138 size_t size,
139 ET_UNUSED const DataLoader::SegmentInfo& segment_info) const {
140 ET_CHECK_OR_RETURN_ERROR(
141 // Probably had its value moved to another instance.
142 fd_ >= 0,
143 InvalidState,
144 "Uninitialized");
145 ET_CHECK_OR_RETURN_ERROR(
146 offset + size <= file_size_,
147 InvalidArgument,
148 "File %s: offset %zu + size %zu > file_size_ %zu",
149 file_name_,
150 offset,
151 size,
152 file_size_);
153
154 // Don't bother allocating/freeing for empty segments.
155 if (size == 0) {
156 return FreeableBuffer(nullptr, 0, /*free_fn=*/nullptr);
157 }
158
159 // Allocate memory for the FreeableBuffer.
160 size_t alloc_size = size;
161 if (alignment_ > alignof(std::max_align_t)) {
162 // malloc() will align to smaller values, but we must manually align to
163 // larger values.
164 alloc_size += alignment_;
165 }
166 void* buffer = std::malloc(alloc_size);
167 if (buffer == nullptr) {
168 ET_LOG(
169 Error,
170 "Reading from %s at offset %zu: malloc(%zd) failed",
171 file_name_,
172 offset,
173 size);
174 return Error::MemoryAllocationFailed;
175 }
176
177 // Align.
178 void* aligned_buffer = align_pointer(buffer, alignment_);
179
180 // Assert that the alignment didn't overflow the buffer.
181 ET_DCHECK_MSG(
182 reinterpret_cast<uintptr_t>(aligned_buffer) + size <=
183 reinterpret_cast<uintptr_t>(buffer) + alloc_size,
184 "aligned_buffer %p + size %zu > buffer %p + alloc_size %zu",
185 aligned_buffer,
186 size,
187 buffer,
188 alloc_size);
189
190 auto err = load_into(offset, size, segment_info, aligned_buffer);
191 if (err != Error::Ok) {
192 // Free `buffer`, which is what malloc() gave us, not `aligned_buffer`.
193 std::free(buffer);
194 return err;
195 }
196
197 // We can't naively free this pointer, since it may not be what malloc() gave
198 // us. Pass the offset to the real buffer as context. This is the number of
199 // bytes that need to be subtracted from the FreeableBuffer::data() pointer to
200 // find the actual pointer to free.
201 return FreeableBuffer(
202 aligned_buffer,
203 size,
204 FreeSegment,
205 /*free_fn_context=*/
206 reinterpret_cast<void*>(
207 // Using signed types here because it will produce a signed ptrdiff_t
208 // value, though for us it will always be non-negative.
209 reinterpret_cast<intptr_t>(aligned_buffer) -
210 reinterpret_cast<intptr_t>(buffer)));
211 }
212
size() const213 Result<size_t> FileDataLoader::size() const {
214 ET_CHECK_OR_RETURN_ERROR(
215 // Probably had its value moved to another instance.
216 fd_ >= 0,
217 InvalidState,
218 "Uninitialized");
219 return file_size_;
220 }
221
load_into(size_t offset,size_t size,ET_UNUSED const SegmentInfo & segment_info,void * buffer) const222 ET_NODISCARD Error FileDataLoader::load_into(
223 size_t offset,
224 size_t size,
225 ET_UNUSED const SegmentInfo& segment_info,
226 void* buffer) const {
227 ET_CHECK_OR_RETURN_ERROR(
228 // Probably had its value moved to another instance.
229 fd_ >= 0,
230 InvalidState,
231 "Uninitialized");
232 ET_CHECK_OR_RETURN_ERROR(
233 offset + size <= file_size_,
234 InvalidArgument,
235 "File %s: offset %zu + size %zu > file_size_ %zu",
236 file_name_,
237 offset,
238 size,
239 file_size_);
240 ET_CHECK_OR_RETURN_ERROR(
241 buffer != nullptr, InvalidArgument, "Provided buffer cannot be null");
242
243 // Read the data into the aligned address.
244 size_t needed = size;
245 uint8_t* buf = reinterpret_cast<uint8_t*>(buffer);
246
247 // Make a duplicate fd if pread() is not available and we have to seek().
248 // Cannot use the standard dup() or fcntl() calls because the returned
249 // duplicate will share the underlying file record and affect the original fd
250 // when seeking on multiple threads simultaneously.
251 const auto dup_fd = ET_HAVE_PREAD ? fd_ : ::open(file_name_, O_RDONLY);
252
253 while (needed > 0) {
254 // Reads on macOS will fail with EINVAL if size > INT32_MAX.
255 const auto chunk_size = std::min<size_t>(
256 needed, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
257 const auto nread =
258 #if ET_HAVE_PREAD
259 ::pread(dup_fd, buf, chunk_size, offset);
260 #else
261 (::lseek(dup_fd, offset, SEEK_SET) == (off_t)-1)
262 ? -1
263 : ::read(dup_fd, buf, chunk_size);
264 #endif
265 if (nread < 0 && errno == EINTR) {
266 // Interrupted by a signal; zero bytes read.
267 continue;
268 }
269 if (nread <= 0) {
270 // nread == 0 means EOF, which we shouldn't see if we were able to read
271 // the full amount. nread < 0 means an error occurred.
272 ET_LOG(
273 Error,
274 "Reading from %s: failed to read %zu bytes at offset %zu: %s",
275 file_name_,
276 size,
277 offset,
278 nread == 0 ? "EOF" : strerror(errno));
279 if (!ET_HAVE_PREAD) {
280 ::close(dup_fd);
281 }
282 return Error::AccessFailed;
283 }
284 needed -= nread;
285 buf += nread;
286 offset += nread;
287 }
288 if (!ET_HAVE_PREAD) {
289 ::close(dup_fd);
290 }
291 return Error::Ok;
292 }
293
294 } // namespace extension
295 } // namespace executorch
296