xref: /aosp_15_r20/external/ublksrv/tgt_loop.cpp (revision 94c4a1e103eb1715230460aab379dff275992c20)
1 // SPDX-License-Identifier: MIT or GPL-2.0-only
2 
3 #include <config.h>
4 
5 #include <poll.h>
6 #include <sys/epoll.h>
7 #include "ublksrv_tgt.h"
8 
9 static bool user_copy;
10 static bool block_device;
11 
backing_supports_discard(char * name)12 static bool backing_supports_discard(char *name)
13 {
14 	int fd;
15 	char buf[512];
16 	int len;
17 
18 	len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
19 			basename(name));
20 	buf[len] = 0;
21 	fd = open(buf, O_RDONLY);
22 	if (fd > 0) {
23 		char val[128];
24 		int ret = pread(fd, val, 128, 0);
25 		unsigned long long bytes = 0;
26 
27 		close(fd);
28 		if (ret > 0)
29 			bytes = strtol(val, NULL, 10);
30 
31 		if (bytes > 0)
32 			return true;
33 	}
34 	return false;
35 }
36 
loop_setup_tgt(struct ublksrv_dev * dev,int type,bool recovery,const char * jbuf)37 static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
38 		const char *jbuf)
39 {
40 	struct ublksrv_tgt_info *tgt = &dev->tgt;
41 	const struct ublksrv_ctrl_dev_info *info =
42 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
43 	int fd, ret;
44 	long direct_io = 0;
45 	struct ublk_params p;
46 	char file[PATH_MAX];
47 
48 	ublk_assert(jbuf);
49 
50 	ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
51 	if (ret < 0) {
52 		ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
53 				__func__, ret);
54 		return ret;
55 	}
56 
57 	ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
58 			&direct_io);
59 	if (ret) {
60 		ublk_err( "%s: read target direct_io failed %d\n",
61 				__func__, ret);
62 		return ret;
63 	}
64 
65 	ret = ublksrv_json_read_params(&p, jbuf);
66 	if (ret) {
67 		ublk_err( "%s: read ublk params failed %d\n",
68 				__func__, ret);
69 		return ret;
70 	}
71 
72 	fd = open(file, O_RDWR);
73 	if (fd < 0) {
74 		ublk_err( "%s: backing file %s can't be opened\n",
75 				__func__, file);
76 		return fd;
77 	}
78 
79 	if (direct_io)
80 		fcntl(fd, F_SETFL, O_DIRECT);
81 
82 	ublksrv_tgt_set_io_data_size(tgt);
83 	tgt->dev_size = p.basic.dev_sectors << 9;
84 	tgt->tgt_ring_depth = info->queue_depth;
85 	tgt->nr_fds = 1;
86 	tgt->fds[1] = fd;
87 	user_copy = info->flags & UBLK_F_USER_COPY;
88 	if (user_copy)
89 		tgt->tgt_ring_depth *= 2;
90 
91 	return 0;
92 }
93 
loop_recovery_tgt(struct ublksrv_dev * dev,int type)94 static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
95 {
96 	const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
97 	const struct ublksrv_ctrl_dev_info *info =
98 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
99 	const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
100 
101 	ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
102 	ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
103 
104 	return loop_setup_tgt(dev, type, true, jbuf);
105 }
106 
loop_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])107 static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
108 		*argv[])
109 {
110 	int buffered_io = 0;
111 	const struct ublksrv_ctrl_dev_info *info =
112 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
113 	static const struct option lo_longopts[] = {
114 		{ "file",		1,	NULL, 'f' },
115 		{ "buffered_io",	no_argument, &buffered_io, 1},
116 		{ NULL }
117 	};
118 	unsigned long long bytes;
119 	struct stat st;
120 	int fd, opt;
121 	char *file = NULL;
122 	int jbuf_size;
123 	char *jbuf;
124 	struct ublksrv_tgt_base_json tgt_json = {
125 		.type = type,
126 	};
127 	struct ublk_params p = {
128 		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
129 		.basic = {
130 			.logical_bs_shift	= 9,
131 			.physical_bs_shift	= 12,
132 			.io_opt_shift	= 12,
133 			.io_min_shift	= 9,
134 			.max_sectors		= info->max_io_buf_bytes >> 9,
135 		},
136 
137 		.discard = {
138 			.max_discard_sectors	= UINT_MAX >> 9,
139 			.max_discard_segments	= 1,
140 		},
141 	};
142 	bool can_discard = false;
143 
144 	strcpy(tgt_json.name, "loop");
145 
146 	if (type != UBLKSRV_TGT_TYPE_LOOP)
147 		return -1;
148 
149 	while ((opt = getopt_long(argc, argv, "-:f:",
150 				  lo_longopts, NULL)) != -1) {
151 		switch (opt) {
152 		case 'f':
153 			file = strdup(optarg);
154 			break;
155 		}
156 	}
157 
158 	if (!file)
159 		return -1;
160 
161 	fd = open(file, O_RDWR);
162 	if (fd < 0) {
163 		ublk_err( "%s: backing file %s can't be opened\n",
164 				__func__, file);
165 		return -2;
166 	}
167 
168 	if (fstat(fd, &st) < 0)
169 		return -2;
170 
171 	if (S_ISBLK(st.st_mode)) {
172 		unsigned int bs, pbs;
173 
174 		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
175 			return -1;
176 		if (ioctl(fd, BLKSSZGET, &bs) != 0)
177 			return -1;
178 		if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
179 			return -1;
180 		block_device = true;
181 		p.basic.logical_bs_shift = ilog2(bs);
182 		p.basic.physical_bs_shift = ilog2(pbs);
183 		can_discard = backing_supports_discard(file);
184 	} else if (S_ISREG(st.st_mode)) {
185 		block_device = false;
186 		bytes = st.st_size;
187 		can_discard = true;
188 		p.basic.logical_bs_shift = ilog2(st.st_blksize);
189 		p.basic.physical_bs_shift = ilog2(st.st_blksize);
190 	} else {
191 		bytes = 0;
192 	}
193 
194 	/*
195 	 * in case of buffered io, use common bs/pbs so that all FS
196 	 * image can be supported
197 	 */
198 	if (buffered_io || !ublk_param_is_valid(&p) ||
199 			fcntl(fd, F_SETFL, O_DIRECT)) {
200 		p.basic.logical_bs_shift = 9;
201 		p.basic.physical_bs_shift = 12;
202 		buffered_io = 1;
203 	}
204 
205 	tgt_json.dev_size = bytes;
206 	p.basic.dev_sectors = bytes >> 9;
207 
208 	if (st.st_blksize && can_discard)
209 		p.discard.discard_granularity = st.st_blksize;
210 	else
211 		p.types &= ~UBLK_PARAM_TYPE_DISCARD;
212 
213 	jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
214 	ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
215 	ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
216 	ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
217 	ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
218 	ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
219 
220 	close(fd);
221 
222 	return loop_setup_tgt(dev, type, false, jbuf);
223 }
224 
loop_usage_for_add(void)225 static void loop_usage_for_add(void)
226 {
227 	printf("           loop: -f backing_file [--buffered_io]\n");
228 	printf("           	default is direct IO to backing file\n");
229 }
230 
loop_fallocate_mode(const struct ublksrv_io_desc * iod)231 static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
232 {
233        __u16 ublk_op = ublksrv_get_op(iod);
234        __u32 flags = ublksrv_get_flags(iod);
235        int mode = FALLOC_FL_KEEP_SIZE;
236 
237        /* follow logic of linux kernel loop */
238        if (ublk_op == UBLK_IO_OP_DISCARD) {
239                mode |= FALLOC_FL_PUNCH_HOLE;
240        } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
241                if (flags & UBLK_IO_F_NOUNMAP)
242                        mode |= FALLOC_FL_ZERO_RANGE;
243                else
244                        mode |= FALLOC_FL_PUNCH_HOLE;
245        } else {
246                mode |= FALLOC_FL_ZERO_RANGE;
247        }
248 
249        return mode;
250 }
251 
loop_queue_tgt_read(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)252 static void loop_queue_tgt_read(const struct ublksrv_queue *q,
253 		const struct ublksrv_io_desc *iod, int tag)
254 {
255 	unsigned ublk_op = ublksrv_get_op(iod);
256 
257 	if (user_copy) {
258 		struct io_uring_sqe *sqe, *sqe2;
259 		__u64 pos = ublk_pos(q->q_id, tag, 0);
260 		void *buf = ublksrv_queue_get_io_buf(q, tag);
261 
262 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
263 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
264 				buf,
265 				iod->nr_sectors << 9,
266 				iod->start_sector << 9);
267 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
268 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
269 
270 		io_uring_prep_write(sqe2, 0 /*fds[0]*/,
271 				buf, iod->nr_sectors << 9, pos);
272 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
273 		/* bit63 marks us as tgt io */
274 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
275 	} else {
276 		struct io_uring_sqe *sqe;
277 		void *buf = (void *)iod->addr;
278 
279 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
280 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
281 			buf,
282 			iod->nr_sectors << 9,
283 			iod->start_sector << 9);
284 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
285 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
286 	}
287 }
288 
loop_queue_tgt_write(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)289 static void loop_queue_tgt_write(const struct ublksrv_queue *q,
290 		const struct ublksrv_io_desc *iod, int tag)
291 {
292 	unsigned ublk_op = ublksrv_get_op(iod);
293 
294 	if (user_copy) {
295 		struct io_uring_sqe *sqe, *sqe2;
296 		__u64 pos = ublk_pos(q->q_id, tag, 0);
297 		void *buf = ublksrv_queue_get_io_buf(q, tag);
298 
299 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
300 		io_uring_prep_read(sqe, 0 /*fds[0]*/,
301 			buf, iod->nr_sectors << 9, pos);
302 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
303 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
304 
305 		io_uring_prep_write(sqe2, 1 /*fds[1]*/,
306 			buf, iod->nr_sectors << 9,
307 			iod->start_sector << 9);
308 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
309 		/* bit63 marks us as tgt io */
310 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
311 	} else {
312 		struct io_uring_sqe *sqe;
313 		void *buf = (void *)iod->addr;
314 
315 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
316 		io_uring_prep_write(sqe, 1 /*fds[1]*/,
317 			buf,
318 			iod->nr_sectors << 9,
319 			iod->start_sector << 9);
320 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
321 		/* bit63 marks us as tgt io */
322 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
323 	}
324 }
325 
loop_queue_tgt_io(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)326 static int loop_queue_tgt_io(const struct ublksrv_queue *q,
327 		const struct ublk_io_data *data, int tag)
328 {
329 	const struct ublksrv_io_desc *iod = data->iod;
330 	struct io_uring_sqe *sqe;
331 	unsigned ublk_op = ublksrv_get_op(iod);
332 
333 	switch (ublk_op) {
334 	case UBLK_IO_OP_FLUSH:
335 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
336 		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
337 				iod->nr_sectors << 9,
338 				iod->start_sector << 9,
339 				IORING_FSYNC_DATASYNC);
340 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
341 		/* bit63 marks us as tgt io */
342 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
343 		break;
344 	case UBLK_IO_OP_WRITE_ZEROES:
345 	case UBLK_IO_OP_DISCARD:
346 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
347 		io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
348 				loop_fallocate_mode(iod),
349 				iod->start_sector << 9,
350 				iod->nr_sectors << 9);
351 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
352 		/* bit63 marks us as tgt io */
353 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
354 		break;
355 	case UBLK_IO_OP_READ:
356 		loop_queue_tgt_read(q, iod, tag);
357 		break;
358 	case UBLK_IO_OP_WRITE:
359 		loop_queue_tgt_write(q, iod, tag);
360 		break;
361 	default:
362 		return -EINVAL;
363 	}
364 
365 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
366 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
367 
368 	return 1;
369 }
370 
__loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)371 static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
372 		const struct ublk_io_data *data, int tag)
373 {
374 	int ret;
375 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
376 
377 	io->queued_tgt_io = 0;
378  again:
379 	ret = loop_queue_tgt_io(q, data, tag);
380 	if (ret > 0) {
381 		if (io->queued_tgt_io)
382 			ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
383 		io->queued_tgt_io += 1;
384 
385 		co_await__suspend_always(tag);
386 		io->queued_tgt_io -= 1;
387 
388 		if (io->tgt_io_cqe->res == -EAGAIN)
389 			goto again;
390 
391 		ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
392 	} else if (ret < 0) {
393 		ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
394 	} else {
395 		ublk_err( "no sqe %d\n", tag);
396 	}
397 }
398 
loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)399 static int loop_handle_io_async(const struct ublksrv_queue *q,
400 		const struct ublk_io_data *data)
401 {
402 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
403 
404 	if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
405 		__u64 r[2];
406 		int res;
407 
408 		io_uring_submit(q->ring_ptr);
409 
410 		r[0] = data->iod->start_sector << 9;
411 		r[1] = data->iod->nr_sectors << 9;
412 		res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
413 		ublksrv_complete_io(q, data->tag, res);
414 	} else {
415 		io->co = __loop_handle_io_async(q, data, data->tag);
416 	}
417 	return 0;
418 }
419 
loop_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)420 static void loop_tgt_io_done(const struct ublksrv_queue *q,
421 		const struct ublk_io_data *data,
422 		const struct io_uring_cqe *cqe)
423 {
424 	int tag = user_data_to_tag(cqe->user_data);
425 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
426 
427 	if (user_data_to_tgt_data(cqe->user_data))
428 		return;
429 
430 	ublk_assert(tag == data->tag);
431 	if (!io->queued_tgt_io)
432 		ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
433 			__func__, cqe->res, q->q_id,
434 			user_data_to_tag(cqe->user_data),
435 			user_data_to_op(cqe->user_data));
436 	io->tgt_io_cqe = cqe;
437 	io->co.resume();
438 }
439 
loop_deinit_tgt(const struct ublksrv_dev * dev)440 static void loop_deinit_tgt(const struct ublksrv_dev *dev)
441 {
442 	fsync(dev->tgt.fds[1]);
443 	close(dev->tgt.fds[1]);
444 }
445 
446 struct ublksrv_tgt_type  loop_tgt_type = {
447 	.handle_io_async = loop_handle_io_async,
448 	.tgt_io_done = loop_tgt_io_done,
449 	.usage_for_add	=  loop_usage_for_add,
450 	.init_tgt = loop_init_tgt,
451 	.deinit_tgt	=  loop_deinit_tgt,
452 	.type	= UBLKSRV_TGT_TYPE_LOOP,
453 	.name	=  "loop",
454 	.recovery_tgt = loop_recovery_tgt,
455 };
456 
457 static void tgt_loop_init() __attribute__((constructor));
458 
tgt_loop_init(void)459 static void tgt_loop_init(void)
460 {
461 	ublksrv_register_tgt_type(&loop_tgt_type);
462 }
463