1 // SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0
2 /*
3 * Copyright (C) 2018 HUAWEI, Inc.
4 * http://www.huawei.com/
5 * Created by Li Guifu <[email protected]>
6 */
7 #ifndef _LARGEFILE64_SOURCE
8 #define _LARGEFILE64_SOURCE
9 #endif
10 #ifndef _GNU_SOURCE
11 #define _GNU_SOURCE
12 #endif
13 #include <stdlib.h>
14 #include <sys/stat.h>
15 #include <sys/ioctl.h>
16 #include "erofs/internal.h"
17 #ifdef HAVE_LINUX_FS_H
18 #include <linux/fs.h>
19 #endif
20 #ifdef HAVE_LINUX_FALLOC_H
21 #include <linux/falloc.h>
22 #endif
23 #ifdef HAVE_SYS_STATFS_H
24 #include <sys/statfs.h>
25 #endif
26 #define EROFS_MODNAME "erofs_io"
27 #include "erofs/print.h"
28
erofs_io_fstat(struct erofs_vfile * vf,struct stat * buf)29 int erofs_io_fstat(struct erofs_vfile *vf, struct stat *buf)
30 {
31 if (__erofs_unlikely(cfg.c_dry_run)) {
32 buf->st_size = 0;
33 buf->st_mode = S_IFREG | 0777;
34 return 0;
35 }
36
37 if (vf->ops)
38 return vf->ops->fstat(vf, buf);
39 return fstat(vf->fd, buf);
40 }
41
erofs_io_pwrite(struct erofs_vfile * vf,const void * buf,u64 pos,size_t len)42 ssize_t erofs_io_pwrite(struct erofs_vfile *vf, const void *buf,
43 u64 pos, size_t len)
44 {
45 ssize_t ret, written = 0;
46
47 if (__erofs_unlikely(cfg.c_dry_run))
48 return 0;
49
50 if (vf->ops)
51 return vf->ops->pwrite(vf, buf, pos, len);
52
53 pos += vf->offset;
54 do {
55 #ifdef HAVE_PWRITE64
56 ret = pwrite64(vf->fd, buf, len, (off64_t)pos);
57 #else
58 ret = pwrite(vf->fd, buf, len, (off_t)pos);
59 #endif
60 if (ret <= 0) {
61 if (!ret)
62 break;
63 if (errno != EINTR) {
64 erofs_err("failed to write: %s", strerror(errno));
65 return -errno;
66 }
67 ret = 0;
68 }
69 buf += ret;
70 pos += ret;
71 written += ret;
72 } while (written < len);
73
74 return written;
75 }
76
erofs_io_fsync(struct erofs_vfile * vf)77 int erofs_io_fsync(struct erofs_vfile *vf)
78 {
79 int ret;
80
81 if (__erofs_unlikely(cfg.c_dry_run))
82 return 0;
83
84 if (vf->ops)
85 return vf->ops->fsync(vf);
86
87 ret = fsync(vf->fd);
88 if (ret) {
89 erofs_err("failed to fsync(!): %s", strerror(errno));
90 return -errno;
91 }
92 return 0;
93 }
94
erofs_io_fallocate(struct erofs_vfile * vf,u64 offset,size_t len,bool zeroout)95 ssize_t erofs_io_fallocate(struct erofs_vfile *vf, u64 offset,
96 size_t len, bool zeroout)
97 {
98 static const char zero[EROFS_MAX_BLOCK_SIZE] = {0};
99 ssize_t ret;
100
101 if (__erofs_unlikely(cfg.c_dry_run))
102 return 0;
103
104 if (vf->ops)
105 return vf->ops->fallocate(vf, offset, len, zeroout);
106
107 #if defined(HAVE_FALLOCATE) && defined(FALLOC_FL_PUNCH_HOLE)
108 if (!zeroout && fallocate(vf->fd, FALLOC_FL_PUNCH_HOLE |
109 FALLOC_FL_KEEP_SIZE, offset + vf->offset, len) >= 0)
110 return 0;
111 #endif
112 while (len > EROFS_MAX_BLOCK_SIZE) {
113 ret = erofs_io_pwrite(vf, zero, offset, EROFS_MAX_BLOCK_SIZE);
114 if (ret < 0)
115 return ret;
116 len -= ret;
117 offset += ret;
118 }
119 return erofs_io_pwrite(vf, zero, offset, len) == len ? 0 : -EIO;
120 }
121
erofs_io_ftruncate(struct erofs_vfile * vf,u64 length)122 int erofs_io_ftruncate(struct erofs_vfile *vf, u64 length)
123 {
124 int ret;
125 struct stat st;
126
127 if (__erofs_unlikely(cfg.c_dry_run))
128 return 0;
129
130 if (vf->ops)
131 return vf->ops->ftruncate(vf, length);
132
133 ret = fstat(vf->fd, &st);
134 if (ret) {
135 erofs_err("failed to fstat: %s", strerror(errno));
136 return -errno;
137 }
138 length += vf->offset;
139 if (S_ISBLK(st.st_mode) || st.st_size == length)
140 return 0;
141 return ftruncate(vf->fd, length);
142 }
143
erofs_io_pread(struct erofs_vfile * vf,void * buf,u64 pos,size_t len)144 ssize_t erofs_io_pread(struct erofs_vfile *vf, void *buf, u64 pos, size_t len)
145 {
146 ssize_t ret, read = 0;
147
148 if (__erofs_unlikely(cfg.c_dry_run))
149 return 0;
150
151 if (vf->ops)
152 return vf->ops->pread(vf, buf, pos, len);
153
154 pos += vf->offset;
155 do {
156 #ifdef HAVE_PREAD64
157 ret = pread64(vf->fd, buf, len, (off64_t)pos);
158 #else
159 ret = pread(vf->fd, buf, len, (off_t)pos);
160 #endif
161 if (ret <= 0) {
162 if (!ret)
163 break;
164 if (errno != EINTR) {
165 erofs_err("failed to read: %s", strerror(errno));
166 return -errno;
167 }
168 ret = 0;
169 }
170 pos += ret;
171 buf += ret;
172 read += ret;
173 } while (read < len);
174
175 return read;
176 }
177
erofs_get_bdev_size(int fd,u64 * bytes)178 static int erofs_get_bdev_size(int fd, u64 *bytes)
179 {
180 errno = ENOTSUP;
181 #ifdef BLKGETSIZE64
182 if (ioctl(fd, BLKGETSIZE64, bytes) >= 0)
183 return 0;
184 #endif
185
186 #ifdef BLKGETSIZE
187 {
188 unsigned long size;
189 if (ioctl(fd, BLKGETSIZE, &size) >= 0) {
190 *bytes = ((u64)size << 9);
191 return 0;
192 }
193 }
194 #endif
195 return -errno;
196 }
197
198 #if defined(__linux__) && !defined(BLKDISCARD)
199 #define BLKDISCARD _IO(0x12, 119)
200 #endif
201
erofs_bdev_discard(int fd,u64 block,u64 count)202 static int erofs_bdev_discard(int fd, u64 block, u64 count)
203 {
204 #ifdef BLKDISCARD
205 u64 range[2] = { block, count };
206
207 return ioctl(fd, BLKDISCARD, &range);
208 #else
209 return -EOPNOTSUPP;
210 #endif
211 }
212
erofs_dev_open(struct erofs_sb_info * sbi,const char * dev,int flags)213 int erofs_dev_open(struct erofs_sb_info *sbi, const char *dev, int flags)
214 {
215 bool ro = (flags & O_ACCMODE) == O_RDONLY;
216 bool truncate = flags & O_TRUNC;
217 struct stat st;
218 int fd, ret;
219
220 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
221 bool again = false;
222
223 repeat:
224 #endif
225 fd = open(dev, (ro ? O_RDONLY : O_RDWR | O_CREAT) | O_BINARY, 0644);
226 if (fd < 0) {
227 erofs_err("failed to open %s: %s", dev, strerror(errno));
228 return -errno;
229 }
230
231 if (ro || !truncate)
232 goto out;
233
234 ret = fstat(fd, &st);
235 if (ret) {
236 erofs_err("failed to fstat(%s): %s", dev, strerror(errno));
237 close(fd);
238 return -errno;
239 }
240
241 switch (st.st_mode & S_IFMT) {
242 case S_IFBLK:
243 ret = erofs_get_bdev_size(fd, &sbi->devsz);
244 if (ret) {
245 erofs_err("failed to get block device size(%s): %s",
246 dev, strerror(errno));
247 close(fd);
248 return ret;
249 }
250 sbi->devsz = round_down(sbi->devsz, erofs_blksiz(sbi));
251 ret = erofs_bdev_discard(fd, 0, sbi->devsz);
252 if (ret)
253 erofs_err("failed to erase block device(%s): %s",
254 dev, erofs_strerror(ret));
255 break;
256 case S_IFREG:
257 if (st.st_size) {
258 #if defined(HAVE_SYS_STATFS_H) && defined(HAVE_FSTATFS)
259 struct statfs stfs;
260
261 if (again) {
262 close(fd);
263 return -ENOTEMPTY;
264 }
265
266 /*
267 * fses like EXT4 and BTRFS will flush dirty blocks
268 * after truncate(0) even after the writeback happens
269 * (see kernel commit 7d8f9f7d150d and ccd2506bd431),
270 * which is NOT our intention. Let's work around this.
271 */
272 if (!fstatfs(fd, &stfs) && (stfs.f_type == 0xEF53 ||
273 stfs.f_type == 0x9123683E)) {
274 close(fd);
275 unlink(dev);
276 again = true;
277 goto repeat;
278 }
279 #endif
280 ret = ftruncate(fd, 0);
281 if (ret) {
282 erofs_err("failed to ftruncate(%s).", dev);
283 close(fd);
284 return -errno;
285 }
286 }
287 sbi->devblksz = st.st_blksize;
288 break;
289 default:
290 erofs_err("bad file type (%s, %o).", dev, st.st_mode);
291 close(fd);
292 return -EINVAL;
293 }
294
295 out:
296 sbi->devname = strdup(dev);
297 if (!sbi->devname) {
298 close(fd);
299 return -ENOMEM;
300 }
301 sbi->bdev.fd = fd;
302 erofs_info("successfully to open %s", dev);
303 return 0;
304 }
305
erofs_dev_close(struct erofs_sb_info * sbi)306 void erofs_dev_close(struct erofs_sb_info *sbi)
307 {
308 if (!sbi->bdev.ops)
309 close(sbi->bdev.fd);
310 free(sbi->devname);
311 sbi->devname = NULL;
312 sbi->bdev.fd = -1;
313 }
314
erofs_blob_closeall(struct erofs_sb_info * sbi)315 void erofs_blob_closeall(struct erofs_sb_info *sbi)
316 {
317 unsigned int i;
318
319 for (i = 0; i < sbi->nblobs; ++i)
320 close(sbi->blobfd[i]);
321 sbi->nblobs = 0;
322 }
323
erofs_blob_open_ro(struct erofs_sb_info * sbi,const char * dev)324 int erofs_blob_open_ro(struct erofs_sb_info *sbi, const char *dev)
325 {
326 int fd = open(dev, O_RDONLY | O_BINARY);
327
328 if (fd < 0) {
329 erofs_err("failed to open(%s).", dev);
330 return -errno;
331 }
332
333 sbi->blobfd[sbi->nblobs] = fd;
334 erofs_info("successfully to open blob%u %s", sbi->nblobs, dev);
335 ++sbi->nblobs;
336 return 0;
337 }
338
erofs_dev_read(struct erofs_sb_info * sbi,int device_id,void * buf,u64 offset,size_t len)339 ssize_t erofs_dev_read(struct erofs_sb_info *sbi, int device_id,
340 void *buf, u64 offset, size_t len)
341 {
342 ssize_t read;
343
344 if (device_id) {
345 if (device_id >= sbi->nblobs) {
346 erofs_err("invalid device id %d", device_id);
347 return -EIO;
348 }
349 read = erofs_io_pread(&((struct erofs_vfile) {
350 .fd = sbi->blobfd[device_id - 1],
351 }), buf, offset, len);
352 } else {
353 read = erofs_io_pread(&sbi->bdev, buf, offset, len);
354 }
355
356 if (read < 0)
357 return read;
358 if (read < len) {
359 erofs_info("reach EOF of device @ %llu, pading with zeroes",
360 offset | 0ULL);
361 memset(buf + read, 0, len - read);
362 }
363 return 0;
364 }
365
__erofs_copy_file_range(int fd_in,u64 * off_in,int fd_out,u64 * off_out,size_t length)366 static ssize_t __erofs_copy_file_range(int fd_in, u64 *off_in,
367 int fd_out, u64 *off_out,
368 size_t length)
369 {
370 size_t copied = 0;
371 char buf[8192];
372
373 /*
374 * Main copying loop. The buffer size is arbitrary and is a
375 * trade-off between stack size consumption, cache usage, and
376 * amortization of system call overhead.
377 */
378 while (length > 0) {
379 size_t to_read;
380 ssize_t read_count;
381 char *end, *p;
382
383 to_read = min_t(size_t, length, sizeof(buf));
384 #ifdef HAVE_PREAD64
385 read_count = pread64(fd_in, buf, to_read, *off_in);
386 #else
387 read_count = pread(fd_in, buf, to_read, *off_in);
388 #endif
389 if (read_count == 0)
390 /* End of file reached prematurely. */
391 return copied;
392 if (read_count < 0) {
393 /* Report the number of bytes copied so far. */
394 if (copied > 0)
395 return copied;
396 return -1;
397 }
398 *off_in += read_count;
399
400 /* Write the buffer part which was read to the destination. */
401 end = buf + read_count;
402 for (p = buf; p < end; ) {
403 ssize_t write_count;
404
405 #ifdef HAVE_PWRITE64
406 write_count = pwrite64(fd_out, p, end - p, *off_out);
407 #else
408 write_count = pwrite(fd_out, p, end - p, *off_out);
409 #endif
410 if (write_count < 0) {
411 /*
412 * Adjust the input read position to match what
413 * we have written, so that the caller can pick
414 * up after the error.
415 */
416 size_t written = p - buf;
417 /*
418 * NB: This needs to be signed so that we can
419 * form the negative value below.
420 */
421 ssize_t overread = read_count - written;
422
423 *off_in -= overread;
424 /* Report the number of bytes copied so far. */
425 if (copied + written > 0)
426 return copied + written;
427 return -1;
428 }
429 p += write_count;
430 *off_out += write_count;
431 } /* Write loop. */
432 copied += read_count;
433 length -= read_count;
434 }
435 return copied;
436 }
437
erofs_copy_file_range(int fd_in,u64 * off_in,int fd_out,u64 * off_out,size_t length)438 ssize_t erofs_copy_file_range(int fd_in, u64 *off_in, int fd_out, u64 *off_out,
439 size_t length)
440 {
441 #ifdef HAVE_COPY_FILE_RANGE
442 off64_t off64_in = *off_in, off64_out = *off_out;
443 ssize_t ret;
444
445 ret = copy_file_range(fd_in, &off64_in, fd_out, &off64_out,
446 length, 0);
447 if (ret >= 0)
448 goto out;
449 if (errno != ENOSYS && errno != EXDEV) {
450 ret = -errno;
451 out:
452 *off_in = off64_in;
453 *off_out = off64_out;
454 return ret;
455 }
456 #endif
457 return __erofs_copy_file_range(fd_in, off_in, fd_out, off_out, length);
458 }
459
erofs_io_read(struct erofs_vfile * vf,void * buf,size_t bytes)460 ssize_t erofs_io_read(struct erofs_vfile *vf, void *buf, size_t bytes)
461 {
462 ssize_t i = 0;
463
464 if (vf->ops)
465 return vf->ops->read(vf, buf, bytes);
466
467 while (bytes) {
468 int len = bytes > INT_MAX ? INT_MAX : bytes;
469 int ret;
470
471 ret = read(vf->fd, buf + i, len);
472 if (ret < 1) {
473 if (ret == 0) {
474 break;
475 } else if (errno != EINTR) {
476 erofs_err("failed to read : %s",
477 strerror(errno));
478 return -errno;
479 }
480 }
481 bytes -= ret;
482 i += ret;
483 }
484 return i;
485 }
486
487 #ifdef HAVE_SYS_SENDFILE_H
488 #include <sys/sendfile.h>
489 #endif
490
erofs_io_lseek(struct erofs_vfile * vf,u64 offset,int whence)491 off_t erofs_io_lseek(struct erofs_vfile *vf, u64 offset, int whence)
492 {
493 if (vf->ops)
494 return vf->ops->lseek(vf, offset, whence);
495
496 return lseek(vf->fd, offset, whence);
497 }
498
erofs_io_xcopy(struct erofs_vfile * vout,off_t pos,struct erofs_vfile * vin,unsigned int len,bool noseek)499 int erofs_io_xcopy(struct erofs_vfile *vout, off_t pos,
500 struct erofs_vfile *vin, unsigned int len, bool noseek)
501 {
502 if (vout->ops)
503 return vout->ops->xcopy(vout, pos, vin, len, noseek);
504
505 if (len && !vin->ops) {
506 off_t ret __maybe_unused;
507
508 #ifdef HAVE_COPY_FILE_RANGE
509 ret = copy_file_range(vin->fd, NULL, vout->fd, &pos, len, 0);
510 if (ret > 0)
511 len -= ret;
512 #endif
513 #if defined(HAVE_SYS_SENDFILE_H) && defined(HAVE_SENDFILE)
514 if (len && !noseek) {
515 ret = lseek(vout->fd, pos, SEEK_SET);
516 if (ret == pos) {
517 ret = sendfile(vout->fd, vin->fd, NULL, len);
518 if (ret > 0) {
519 pos += ret;
520 len -= ret;
521 }
522 }
523 }
524 #endif
525 }
526
527 do {
528 char buf[32768];
529 int ret = min_t(unsigned int, len, sizeof(buf));
530
531 ret = erofs_io_read(vin, buf, ret);
532 if (ret < 0)
533 return ret;
534 if (ret > 0) {
535 ret = erofs_io_pwrite(vout, buf, pos, ret);
536 if (ret < 0)
537 return ret;
538 pos += ret;
539 }
540 len -= ret;
541 } while (len);
542 return 0;
543 }
544