1 /* SPDX-License-Identifier: MIT */
2 /*
3 * Description: run various reads tests, verifying data
4 *
5 */
6 #include <errno.h>
7 #include <stdio.h>
8 #include <unistd.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <fcntl.h>
12 #include <assert.h>
13 #include <string.h>
14 #include <sys/ioctl.h>
15 #include <sys/stat.h>
16 #include <linux/fs.h>
17
18 #include "helpers.h"
19 #include "liburing.h"
20
21 #define FSIZE 128*1024*1024
22 #define CHUNK_SIZE 131072
23 #define PUNCH_SIZE 32768
24
25 /*
26 * 8 because it fits within the on-stack iov, 16 because it's larger than 8
27 */
28 #define MIN_VECS 8
29 #define MAX_VECS 16
30
31 /*
32 * Can be anything, let's just do something for a bit of parallellism
33 */
34 #define READ_BATCH 16
35
36 /*
37 * Each offset in the file has the offset / sizeof(int) stored for every
38 * sizeof(int) address.
39 */
verify_buf(void * buf,size_t size,off_t off)40 static int verify_buf(void *buf, size_t size, off_t off)
41 {
42 int i, u_in_buf = size / sizeof(unsigned int);
43 unsigned int *ptr;
44
45 off /= sizeof(unsigned int);
46 ptr = buf;
47 for (i = 0; i < u_in_buf; i++) {
48 if (off != *ptr) {
49 fprintf(stderr, "Found %u, wanted %lu\n", *ptr, off);
50 return 1;
51 }
52 ptr++;
53 off++;
54 }
55
56 return 0;
57 }
58
test_truncate(struct io_uring * ring,const char * fname,int buffered,int vectored,int provide_buf)59 static int test_truncate(struct io_uring *ring, const char *fname, int buffered,
60 int vectored, int provide_buf)
61 {
62 struct io_uring_cqe *cqe;
63 struct io_uring_sqe *sqe;
64 struct iovec vec;
65 struct stat sb;
66 off_t punch_off, off, file_size;
67 void *buf = NULL;
68 int u_in_buf, i, ret, fd, first_pass = 1;
69 unsigned int *ptr;
70
71 if (buffered)
72 fd = open(fname, O_RDWR);
73 else
74 fd = open(fname, O_DIRECT | O_RDWR);
75 if (fd < 0) {
76 perror("open");
77 return 1;
78 }
79
80 if (fstat(fd, &sb) < 0) {
81 perror("stat");
82 close(fd);
83 return 1;
84 }
85
86 if (S_ISREG(sb.st_mode)) {
87 file_size = sb.st_size;
88 } else if (S_ISBLK(sb.st_mode)) {
89 unsigned long long bytes;
90
91 if (ioctl(fd, BLKGETSIZE64, &bytes) < 0) {
92 perror("ioctl");
93 close(fd);
94 return 1;
95 }
96 file_size = bytes;
97 } else {
98 goto out;
99 }
100
101 if (file_size < CHUNK_SIZE)
102 goto out;
103
104 t_posix_memalign(&buf, 4096, CHUNK_SIZE);
105
106 off = file_size - (CHUNK_SIZE / 2);
107 punch_off = off + CHUNK_SIZE / 4;
108
109 u_in_buf = CHUNK_SIZE / sizeof(unsigned int);
110 ptr = buf;
111 for (i = 0; i < u_in_buf; i++) {
112 *ptr = i;
113 ptr++;
114 }
115 ret = pwrite(fd, buf, CHUNK_SIZE / 2, off);
116 if (ret < 0) {
117 perror("pwrite");
118 goto err;
119 } else if (ret != CHUNK_SIZE / 2)
120 goto out;
121
122 again:
123 /*
124 * Read in last bit of file so it's known cached, then remove half of that
125 * last bit so we get a short read that needs retry
126 */
127 ret = pread(fd, buf, CHUNK_SIZE / 2, off);
128 if (ret < 0) {
129 perror("pread");
130 goto err;
131 } else if (ret != CHUNK_SIZE / 2)
132 goto out;
133
134 if (posix_fadvise(fd, punch_off, CHUNK_SIZE / 4, POSIX_FADV_DONTNEED) < 0) {
135 perror("posix_fadivse");
136 goto err;
137 }
138
139 if (provide_buf) {
140 sqe = io_uring_get_sqe(ring);
141 io_uring_prep_provide_buffers(sqe, buf, CHUNK_SIZE, 1, 0, 0);
142 ret = io_uring_submit(ring);
143 if (ret != 1) {
144 fprintf(stderr, "submit failed %d\n", ret);
145 goto err;
146 }
147 ret = io_uring_wait_cqe(ring, &cqe);
148 if (ret < 0) {
149 fprintf(stderr, "wait completion %d\n", ret);
150 goto err;
151 }
152 ret = cqe->res;
153 io_uring_cqe_seen(ring, cqe);
154 if (ret) {
155 fprintf(stderr, "Provide buffer failed %d\n", ret);
156 goto err;
157 }
158 }
159
160 sqe = io_uring_get_sqe(ring);
161 if (!sqe) {
162 fprintf(stderr, "get sqe failed\n");
163 goto err;
164 }
165
166 if (vectored) {
167 assert(!provide_buf);
168 vec.iov_base = buf;
169 vec.iov_len = CHUNK_SIZE;
170 io_uring_prep_readv(sqe, fd, &vec, 1, off);
171 } else {
172 if (provide_buf) {
173 io_uring_prep_read(sqe, fd, NULL, CHUNK_SIZE, off);
174 sqe->flags |= IOSQE_BUFFER_SELECT;
175 } else {
176 io_uring_prep_read(sqe, fd, buf, CHUNK_SIZE, off);
177 }
178 }
179 memset(buf, 0, CHUNK_SIZE);
180
181 ret = io_uring_submit(ring);
182 if (ret != 1) {
183 fprintf(stderr, "Submit failed %d\n", ret);
184 goto err;
185 }
186
187 ret = io_uring_wait_cqe(ring, &cqe);
188 if (ret < 0) {
189 fprintf(stderr, "wait completion %d\n", ret);
190 goto err;
191 }
192
193 ret = cqe->res;
194 io_uring_cqe_seen(ring, cqe);
195 if (ret != CHUNK_SIZE / 2) {
196 fprintf(stderr, "Unexpected truncated read %d\n", ret);
197 goto err;
198 }
199
200 if (verify_buf(buf, CHUNK_SIZE / 2, 0))
201 goto err;
202
203 /*
204 * Repeat, but punch first part instead of last
205 */
206 if (first_pass) {
207 punch_off = file_size - CHUNK_SIZE / 4;
208 first_pass = 0;
209 goto again;
210 }
211
212 out:
213 free(buf);
214 close(fd);
215 return 0;
216 err:
217 free(buf);
218 close(fd);
219 return 1;
220 }
221
222 enum {
223 PUNCH_NONE,
224 PUNCH_FRONT,
225 PUNCH_MIDDLE,
226 PUNCH_END,
227 };
228
229 /*
230 * For each chunk in file, DONTNEED a start, end, or middle segment of it.
231 * We enter here with the file fully cached every time, either freshly
232 * written or after other reads. This forces (at least) the buffered reads
233 * to be handled incrementally, exercising that path.
234 */
do_punch(int fd)235 static int do_punch(int fd)
236 {
237 off_t offset = 0;
238 int punch_type;
239
240 while (offset + CHUNK_SIZE <= FSIZE) {
241 off_t punch_off;
242
243 punch_type = rand() % (PUNCH_END + 1);
244 switch (punch_type) {
245 default:
246 case PUNCH_NONE:
247 punch_off = -1; /* gcc... */
248 break;
249 case PUNCH_FRONT:
250 punch_off = offset;
251 break;
252 case PUNCH_MIDDLE:
253 punch_off = offset + PUNCH_SIZE;
254 break;
255 case PUNCH_END:
256 punch_off = offset + CHUNK_SIZE - PUNCH_SIZE;
257 break;
258 }
259
260 offset += CHUNK_SIZE;
261 if (punch_type == PUNCH_NONE)
262 continue;
263 if (posix_fadvise(fd, punch_off, PUNCH_SIZE, POSIX_FADV_DONTNEED) < 0) {
264 perror("posix_fadivse");
265 return 1;
266 }
267 }
268
269 return 0;
270 }
271
provide_buffers(struct io_uring * ring,void ** buf)272 static int provide_buffers(struct io_uring *ring, void **buf)
273 {
274 struct io_uring_cqe *cqe;
275 struct io_uring_sqe *sqe;
276 int i, ret;
277
278 /* real use case would have one buffer chopped up, but... */
279 for (i = 0; i < READ_BATCH; i++) {
280 sqe = io_uring_get_sqe(ring);
281 io_uring_prep_provide_buffers(sqe, buf[i], CHUNK_SIZE, 1, 0, i);
282 }
283
284 ret = io_uring_submit(ring);
285 if (ret != READ_BATCH) {
286 fprintf(stderr, "Submit failed %d\n", ret);
287 return 1;
288 }
289
290 for (i = 0; i < READ_BATCH; i++) {
291 ret = io_uring_wait_cqe(ring, &cqe);
292 if (ret) {
293 fprintf(stderr, "wait cqe %d\n", ret);
294 return 1;
295 }
296 if (cqe->res < 0) {
297 fprintf(stderr, "cqe res provide %d\n", cqe->res);
298 return 1;
299 }
300 io_uring_cqe_seen(ring, cqe);
301 }
302
303 return 0;
304 }
305
test(struct io_uring * ring,const char * fname,int buffered,int vectored,int small_vecs,int registered,int provide)306 static int test(struct io_uring *ring, const char *fname, int buffered,
307 int vectored, int small_vecs, int registered, int provide)
308 {
309 struct iovec vecs[READ_BATCH][MAX_VECS];
310 struct io_uring_cqe *cqe;
311 struct io_uring_sqe *sqe;
312 void *buf[READ_BATCH];
313 int ret, fd, flags;
314 int i, j, nr_vecs;
315 off_t off, voff;
316 size_t left;
317
318 if (registered) {
319 assert(!provide);
320 assert(!vectored && !small_vecs);
321 }
322 if (provide) {
323 assert(!registered);
324 assert(!vectored && !small_vecs);
325 }
326
327 flags = O_RDONLY;
328 if (!buffered)
329 flags |= O_DIRECT;
330 fd = open(fname, flags);
331 if (fd < 0) {
332 perror("open");
333 return 1;
334 }
335
336 if (do_punch(fd))
337 return 1;
338
339 if (vectored) {
340 if (small_vecs)
341 nr_vecs = MIN_VECS;
342 else
343 nr_vecs = MAX_VECS;
344
345 for (j = 0; j < READ_BATCH; j++) {
346 for (i = 0; i < nr_vecs; i++) {
347 void *ptr;
348
349 t_posix_memalign(&ptr, 4096, CHUNK_SIZE / nr_vecs);
350 vecs[j][i].iov_base = ptr;
351 vecs[j][i].iov_len = CHUNK_SIZE / nr_vecs;
352 }
353 }
354 } else {
355 for (j = 0; j < READ_BATCH; j++)
356 t_posix_memalign(&buf[j], 4096, CHUNK_SIZE);
357 nr_vecs = 0;
358 }
359
360 if (registered) {
361 struct iovec v[READ_BATCH];
362
363 for (i = 0; i < READ_BATCH; i++) {
364 v[i].iov_base = buf[i];
365 v[i].iov_len = CHUNK_SIZE;
366 }
367 ret = io_uring_register_buffers(ring, v, READ_BATCH);
368 if (ret) {
369 fprintf(stderr, "Error buffer reg %d\n", ret);
370 goto err;
371 }
372 }
373
374 i = 0;
375 left = FSIZE;
376 off = 0;
377 while (left) {
378 int pending = 0;
379
380 if (provide && provide_buffers(ring, buf))
381 goto err;
382
383 for (i = 0; i < READ_BATCH; i++) {
384 size_t this = left;
385
386 if (this > CHUNK_SIZE)
387 this = CHUNK_SIZE;
388
389 sqe = io_uring_get_sqe(ring);
390 if (!sqe) {
391 fprintf(stderr, "get sqe failed\n");
392 goto err;
393 }
394
395 if (vectored) {
396 io_uring_prep_readv(sqe, fd, vecs[i], nr_vecs, off);
397 } else {
398 if (registered) {
399 io_uring_prep_read_fixed(sqe, fd, buf[i], this, off, i);
400 } else if (provide) {
401 io_uring_prep_read(sqe, fd, NULL, this, off);
402 sqe->flags |= IOSQE_BUFFER_SELECT;
403 } else {
404 io_uring_prep_read(sqe, fd, buf[i], this, off);
405 }
406 }
407 sqe->user_data = ((uint64_t)off << 32) | i;
408 off += this;
409 left -= this;
410 pending++;
411 if (!left)
412 break;
413 }
414
415 ret = io_uring_submit(ring);
416 if (ret != pending) {
417 fprintf(stderr, "sqe submit failed: %d\n", ret);
418 goto err;
419 }
420
421 for (i = 0; i < pending; i++) {
422 int index;
423
424 ret = io_uring_wait_cqe(ring, &cqe);
425 if (ret < 0) {
426 fprintf(stderr, "wait completion %d\n", ret);
427 goto err;
428 }
429 if (cqe->res < 0) {
430 fprintf(stderr, "bad read %d, read %d\n", cqe->res, i);
431 goto err;
432 }
433 if (cqe->flags & IORING_CQE_F_BUFFER)
434 index = cqe->flags >> 16;
435 else
436 index = cqe->user_data & 0xffffffff;
437 voff = cqe->user_data >> 32;
438 io_uring_cqe_seen(ring, cqe);
439 if (vectored) {
440 for (j = 0; j < nr_vecs; j++) {
441 void *buf = vecs[index][j].iov_base;
442 size_t len = vecs[index][j].iov_len;
443
444 if (verify_buf(buf, len, voff))
445 goto err;
446 voff += len;
447 }
448 } else {
449 if (verify_buf(buf[index], CHUNK_SIZE, voff))
450 goto err;
451 }
452 }
453 }
454
455 ret = 0;
456 done:
457 if (registered)
458 io_uring_unregister_buffers(ring);
459 if (vectored) {
460 for (j = 0; j < READ_BATCH; j++)
461 for (i = 0; i < nr_vecs; i++)
462 free(vecs[j][i].iov_base);
463 } else {
464 for (j = 0; j < READ_BATCH; j++)
465 free(buf[j]);
466 }
467 close(fd);
468 return ret;
469 err:
470 ret = 1;
471 goto done;
472 }
473
fill_pattern(const char * fname)474 static int fill_pattern(const char *fname)
475 {
476 size_t left = FSIZE;
477 unsigned int val, *ptr;
478 void *buf;
479 int fd, i;
480
481 fd = open(fname, O_WRONLY);
482 if (fd < 0) {
483 perror("open");
484 return 1;
485 }
486
487 val = 0;
488 buf = t_malloc(4096);
489 while (left) {
490 int u_in_buf = 4096 / sizeof(val);
491 size_t this = left;
492
493 if (this > 4096)
494 this = 4096;
495 ptr = buf;
496 for (i = 0; i < u_in_buf; i++) {
497 *ptr = val;
498 val++;
499 ptr++;
500 }
501 if (write(fd, buf, 4096) != 4096)
502 return 1;
503 left -= 4096;
504 }
505
506 fsync(fd);
507 close(fd);
508 free(buf);
509 return 0;
510 }
511
main(int argc,char * argv[])512 int main(int argc, char *argv[])
513 {
514 struct io_uring ring;
515 const char *fname;
516 char buf[32];
517 int ret;
518
519 srand(getpid());
520
521 if (argc > 1) {
522 fname = argv[1];
523 } else {
524 sprintf(buf, ".file-verify.%d", getpid());
525 fname = buf;
526 t_create_file(fname, FSIZE);
527 }
528
529 ret = io_uring_queue_init(READ_BATCH, &ring, 0);
530 if (ret) {
531 fprintf(stderr, "ring setup failed: %d\n", ret);
532 goto err;
533 }
534
535 if (fill_pattern(fname))
536 goto err;
537
538 ret = test(&ring, fname, 1, 0, 0, 0, 0);
539 if (ret) {
540 fprintf(stderr, "Buffered novec test failed\n");
541 goto err;
542 }
543 ret = test(&ring, fname, 1, 0, 0, 1, 0);
544 if (ret) {
545 fprintf(stderr, "Buffered novec reg test failed\n");
546 goto err;
547 }
548 ret = test(&ring, fname, 1, 0, 0, 0, 1);
549 if (ret) {
550 fprintf(stderr, "Buffered novec provide test failed\n");
551 goto err;
552 }
553 ret = test(&ring, fname, 1, 1, 0, 0, 0);
554 if (ret) {
555 fprintf(stderr, "Buffered vec test failed\n");
556 goto err;
557 }
558 ret = test(&ring, fname, 1, 1, 1, 0, 0);
559 if (ret) {
560 fprintf(stderr, "Buffered small vec test failed\n");
561 goto err;
562 }
563
564 ret = test(&ring, fname, 0, 0, 0, 0, 0);
565 if (ret) {
566 fprintf(stderr, "O_DIRECT novec test failed\n");
567 goto err;
568 }
569 ret = test(&ring, fname, 0, 0, 0, 1, 0);
570 if (ret) {
571 fprintf(stderr, "O_DIRECT novec reg test failed\n");
572 goto err;
573 }
574 ret = test(&ring, fname, 0, 0, 0, 0, 1);
575 if (ret) {
576 fprintf(stderr, "O_DIRECT novec provide test failed\n");
577 goto err;
578 }
579 ret = test(&ring, fname, 0, 1, 0, 0, 0);
580 if (ret) {
581 fprintf(stderr, "O_DIRECT vec test failed\n");
582 goto err;
583 }
584 ret = test(&ring, fname, 0, 1, 1, 0, 0);
585 if (ret) {
586 fprintf(stderr, "O_DIRECT small vec test failed\n");
587 goto err;
588 }
589
590 ret = test_truncate(&ring, fname, 1, 0, 0);
591 if (ret) {
592 fprintf(stderr, "Buffered end truncate read failed\n");
593 goto err;
594 }
595 ret = test_truncate(&ring, fname, 1, 1, 0);
596 if (ret) {
597 fprintf(stderr, "Buffered end truncate vec read failed\n");
598 goto err;
599 }
600 ret = test_truncate(&ring, fname, 1, 0, 1);
601 if (ret) {
602 fprintf(stderr, "Buffered end truncate pbuf read failed\n");
603 goto err;
604 }
605
606 ret = test_truncate(&ring, fname, 0, 0, 0);
607 if (ret) {
608 fprintf(stderr, "O_DIRECT end truncate read failed\n");
609 goto err;
610 }
611 ret = test_truncate(&ring, fname, 0, 1, 0);
612 if (ret) {
613 fprintf(stderr, "O_DIRECT end truncate vec read failed\n");
614 goto err;
615 }
616 ret = test_truncate(&ring, fname, 0, 0, 1);
617 if (ret) {
618 fprintf(stderr, "O_DIRECT end truncate pbuf read failed\n");
619 goto err;
620 }
621
622 if (buf == fname)
623 unlink(fname);
624 return 0;
625 err:
626 if (buf == fname)
627 unlink(fname);
628 return 1;
629 }
630