1 // SPDX-License-Identifier: MIT or GPL-2.0-only
2
3 #include <config.h>
4
5 #include <poll.h>
6 #include <sys/epoll.h>
7 #include "ublksrv_tgt.h"
8
9 static bool user_copy;
10 static bool block_device;
11
backing_supports_discard(char * name)12 static bool backing_supports_discard(char *name)
13 {
14 int fd;
15 char buf[512];
16 int len;
17
18 len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
19 basename(name));
20 buf[len] = 0;
21 fd = open(buf, O_RDONLY);
22 if (fd > 0) {
23 char val[128];
24 int ret = pread(fd, val, 128, 0);
25 unsigned long long bytes = 0;
26
27 close(fd);
28 if (ret > 0)
29 bytes = strtol(val, NULL, 10);
30
31 if (bytes > 0)
32 return true;
33 }
34 return false;
35 }
36
loop_setup_tgt(struct ublksrv_dev * dev,int type,bool recovery,const char * jbuf)37 static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
38 const char *jbuf)
39 {
40 struct ublksrv_tgt_info *tgt = &dev->tgt;
41 const struct ublksrv_ctrl_dev_info *info =
42 ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
43 int fd, ret;
44 long direct_io = 0;
45 struct ublk_params p;
46 char file[PATH_MAX];
47
48 ublk_assert(jbuf);
49
50 ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
51 if (ret < 0) {
52 ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
53 __func__, ret);
54 return ret;
55 }
56
57 ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
58 &direct_io);
59 if (ret) {
60 ublk_err( "%s: read target direct_io failed %d\n",
61 __func__, ret);
62 return ret;
63 }
64
65 ret = ublksrv_json_read_params(&p, jbuf);
66 if (ret) {
67 ublk_err( "%s: read ublk params failed %d\n",
68 __func__, ret);
69 return ret;
70 }
71
72 fd = open(file, O_RDWR);
73 if (fd < 0) {
74 ublk_err( "%s: backing file %s can't be opened\n",
75 __func__, file);
76 return fd;
77 }
78
79 if (direct_io)
80 fcntl(fd, F_SETFL, O_DIRECT);
81
82 ublksrv_tgt_set_io_data_size(tgt);
83 tgt->dev_size = p.basic.dev_sectors << 9;
84 tgt->tgt_ring_depth = info->queue_depth;
85 tgt->nr_fds = 1;
86 tgt->fds[1] = fd;
87 user_copy = info->flags & UBLK_F_USER_COPY;
88 if (user_copy)
89 tgt->tgt_ring_depth *= 2;
90
91 return 0;
92 }
93
loop_recovery_tgt(struct ublksrv_dev * dev,int type)94 static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
95 {
96 const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
97 const struct ublksrv_ctrl_dev_info *info =
98 ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
99 const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
100
101 ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
102 ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
103
104 return loop_setup_tgt(dev, type, true, jbuf);
105 }
106
loop_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])107 static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
108 *argv[])
109 {
110 int buffered_io = 0;
111 const struct ublksrv_ctrl_dev_info *info =
112 ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
113 static const struct option lo_longopts[] = {
114 { "file", 1, NULL, 'f' },
115 { "buffered_io", no_argument, &buffered_io, 1},
116 { NULL }
117 };
118 unsigned long long bytes;
119 struct stat st;
120 int fd, opt;
121 char *file = NULL;
122 int jbuf_size;
123 char *jbuf;
124 struct ublksrv_tgt_base_json tgt_json = {
125 .type = type,
126 };
127 struct ublk_params p = {
128 .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
129 .basic = {
130 .logical_bs_shift = 9,
131 .physical_bs_shift = 12,
132 .io_opt_shift = 12,
133 .io_min_shift = 9,
134 .max_sectors = info->max_io_buf_bytes >> 9,
135 },
136
137 .discard = {
138 .max_discard_sectors = UINT_MAX >> 9,
139 .max_discard_segments = 1,
140 },
141 };
142 bool can_discard = false;
143
144 strcpy(tgt_json.name, "loop");
145
146 if (type != UBLKSRV_TGT_TYPE_LOOP)
147 return -1;
148
149 while ((opt = getopt_long(argc, argv, "-:f:",
150 lo_longopts, NULL)) != -1) {
151 switch (opt) {
152 case 'f':
153 file = strdup(optarg);
154 break;
155 }
156 }
157
158 if (!file)
159 return -1;
160
161 fd = open(file, O_RDWR);
162 if (fd < 0) {
163 ublk_err( "%s: backing file %s can't be opened\n",
164 __func__, file);
165 return -2;
166 }
167
168 if (fstat(fd, &st) < 0)
169 return -2;
170
171 if (S_ISBLK(st.st_mode)) {
172 unsigned int bs, pbs;
173
174 if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
175 return -1;
176 if (ioctl(fd, BLKSSZGET, &bs) != 0)
177 return -1;
178 if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
179 return -1;
180 block_device = true;
181 p.basic.logical_bs_shift = ilog2(bs);
182 p.basic.physical_bs_shift = ilog2(pbs);
183 can_discard = backing_supports_discard(file);
184 } else if (S_ISREG(st.st_mode)) {
185 block_device = false;
186 bytes = st.st_size;
187 can_discard = true;
188 p.basic.logical_bs_shift = ilog2(st.st_blksize);
189 p.basic.physical_bs_shift = ilog2(st.st_blksize);
190 } else {
191 bytes = 0;
192 }
193
194 /*
195 * in case of buffered io, use common bs/pbs so that all FS
196 * image can be supported
197 */
198 if (buffered_io || !ublk_param_is_valid(&p) ||
199 fcntl(fd, F_SETFL, O_DIRECT)) {
200 p.basic.logical_bs_shift = 9;
201 p.basic.physical_bs_shift = 12;
202 buffered_io = 1;
203 }
204
205 tgt_json.dev_size = bytes;
206 p.basic.dev_sectors = bytes >> 9;
207
208 if (st.st_blksize && can_discard)
209 p.discard.discard_granularity = st.st_blksize;
210 else
211 p.types &= ~UBLK_PARAM_TYPE_DISCARD;
212
213 jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
214 ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
215 ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
216 ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
217 ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
218 ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
219
220 close(fd);
221
222 return loop_setup_tgt(dev, type, false, jbuf);
223 }
224
loop_usage_for_add(void)225 static void loop_usage_for_add(void)
226 {
227 printf(" loop: -f backing_file [--buffered_io]\n");
228 printf(" default is direct IO to backing file\n");
229 }
230
loop_fallocate_mode(const struct ublksrv_io_desc * iod)231 static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
232 {
233 __u16 ublk_op = ublksrv_get_op(iod);
234 __u32 flags = ublksrv_get_flags(iod);
235 int mode = FALLOC_FL_KEEP_SIZE;
236
237 /* follow logic of linux kernel loop */
238 if (ublk_op == UBLK_IO_OP_DISCARD) {
239 mode |= FALLOC_FL_PUNCH_HOLE;
240 } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
241 if (flags & UBLK_IO_F_NOUNMAP)
242 mode |= FALLOC_FL_ZERO_RANGE;
243 else
244 mode |= FALLOC_FL_PUNCH_HOLE;
245 } else {
246 mode |= FALLOC_FL_ZERO_RANGE;
247 }
248
249 return mode;
250 }
251
loop_queue_tgt_read(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)252 static void loop_queue_tgt_read(const struct ublksrv_queue *q,
253 const struct ublksrv_io_desc *iod, int tag)
254 {
255 unsigned ublk_op = ublksrv_get_op(iod);
256
257 if (user_copy) {
258 struct io_uring_sqe *sqe, *sqe2;
259 __u64 pos = ublk_pos(q->q_id, tag, 0);
260 void *buf = ublksrv_queue_get_io_buf(q, tag);
261
262 ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
263 io_uring_prep_read(sqe, 1 /*fds[1]*/,
264 buf,
265 iod->nr_sectors << 9,
266 iod->start_sector << 9);
267 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
268 sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
269
270 io_uring_prep_write(sqe2, 0 /*fds[0]*/,
271 buf, iod->nr_sectors << 9, pos);
272 io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
273 /* bit63 marks us as tgt io */
274 sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
275 } else {
276 struct io_uring_sqe *sqe;
277 void *buf = (void *)iod->addr;
278
279 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
280 io_uring_prep_read(sqe, 1 /*fds[1]*/,
281 buf,
282 iod->nr_sectors << 9,
283 iod->start_sector << 9);
284 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
285 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
286 }
287 }
288
loop_queue_tgt_write(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)289 static void loop_queue_tgt_write(const struct ublksrv_queue *q,
290 const struct ublksrv_io_desc *iod, int tag)
291 {
292 unsigned ublk_op = ublksrv_get_op(iod);
293
294 if (user_copy) {
295 struct io_uring_sqe *sqe, *sqe2;
296 __u64 pos = ublk_pos(q->q_id, tag, 0);
297 void *buf = ublksrv_queue_get_io_buf(q, tag);
298
299 ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
300 io_uring_prep_read(sqe, 0 /*fds[0]*/,
301 buf, iod->nr_sectors << 9, pos);
302 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
303 sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
304
305 io_uring_prep_write(sqe2, 1 /*fds[1]*/,
306 buf, iod->nr_sectors << 9,
307 iod->start_sector << 9);
308 io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
309 /* bit63 marks us as tgt io */
310 sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
311 } else {
312 struct io_uring_sqe *sqe;
313 void *buf = (void *)iod->addr;
314
315 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
316 io_uring_prep_write(sqe, 1 /*fds[1]*/,
317 buf,
318 iod->nr_sectors << 9,
319 iod->start_sector << 9);
320 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
321 /* bit63 marks us as tgt io */
322 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
323 }
324 }
325
loop_queue_tgt_io(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)326 static int loop_queue_tgt_io(const struct ublksrv_queue *q,
327 const struct ublk_io_data *data, int tag)
328 {
329 const struct ublksrv_io_desc *iod = data->iod;
330 struct io_uring_sqe *sqe;
331 unsigned ublk_op = ublksrv_get_op(iod);
332
333 switch (ublk_op) {
334 case UBLK_IO_OP_FLUSH:
335 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
336 io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
337 iod->nr_sectors << 9,
338 iod->start_sector << 9,
339 IORING_FSYNC_DATASYNC);
340 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
341 /* bit63 marks us as tgt io */
342 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
343 break;
344 case UBLK_IO_OP_WRITE_ZEROES:
345 case UBLK_IO_OP_DISCARD:
346 ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
347 io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
348 loop_fallocate_mode(iod),
349 iod->start_sector << 9,
350 iod->nr_sectors << 9);
351 io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
352 /* bit63 marks us as tgt io */
353 sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
354 break;
355 case UBLK_IO_OP_READ:
356 loop_queue_tgt_read(q, iod, tag);
357 break;
358 case UBLK_IO_OP_WRITE:
359 loop_queue_tgt_write(q, iod, tag);
360 break;
361 default:
362 return -EINVAL;
363 }
364
365 ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
366 iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
367
368 return 1;
369 }
370
__loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)371 static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
372 const struct ublk_io_data *data, int tag)
373 {
374 int ret;
375 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
376
377 io->queued_tgt_io = 0;
378 again:
379 ret = loop_queue_tgt_io(q, data, tag);
380 if (ret > 0) {
381 if (io->queued_tgt_io)
382 ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
383 io->queued_tgt_io += 1;
384
385 co_await__suspend_always(tag);
386 io->queued_tgt_io -= 1;
387
388 if (io->tgt_io_cqe->res == -EAGAIN)
389 goto again;
390
391 ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
392 } else if (ret < 0) {
393 ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
394 } else {
395 ublk_err( "no sqe %d\n", tag);
396 }
397 }
398
loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)399 static int loop_handle_io_async(const struct ublksrv_queue *q,
400 const struct ublk_io_data *data)
401 {
402 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
403
404 if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
405 __u64 r[2];
406 int res;
407
408 io_uring_submit(q->ring_ptr);
409
410 r[0] = data->iod->start_sector << 9;
411 r[1] = data->iod->nr_sectors << 9;
412 res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
413 ublksrv_complete_io(q, data->tag, res);
414 } else {
415 io->co = __loop_handle_io_async(q, data, data->tag);
416 }
417 return 0;
418 }
419
loop_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)420 static void loop_tgt_io_done(const struct ublksrv_queue *q,
421 const struct ublk_io_data *data,
422 const struct io_uring_cqe *cqe)
423 {
424 int tag = user_data_to_tag(cqe->user_data);
425 struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
426
427 if (user_data_to_tgt_data(cqe->user_data))
428 return;
429
430 ublk_assert(tag == data->tag);
431 if (!io->queued_tgt_io)
432 ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
433 __func__, cqe->res, q->q_id,
434 user_data_to_tag(cqe->user_data),
435 user_data_to_op(cqe->user_data));
436 io->tgt_io_cqe = cqe;
437 io->co.resume();
438 }
439
loop_deinit_tgt(const struct ublksrv_dev * dev)440 static void loop_deinit_tgt(const struct ublksrv_dev *dev)
441 {
442 fsync(dev->tgt.fds[1]);
443 close(dev->tgt.fds[1]);
444 }
445
446 struct ublksrv_tgt_type loop_tgt_type = {
447 .handle_io_async = loop_handle_io_async,
448 .tgt_io_done = loop_tgt_io_done,
449 .usage_for_add = loop_usage_for_add,
450 .init_tgt = loop_init_tgt,
451 .deinit_tgt = loop_deinit_tgt,
452 .type = UBLKSRV_TGT_TYPE_LOOP,
453 .name = "loop",
454 .recovery_tgt = loop_recovery_tgt,
455 };
456
457 static void tgt_loop_init() __attribute__((constructor));
458
tgt_loop_init(void)459 static void tgt_loop_init(void)
460 {
461 ublksrv_register_tgt_type(&loop_tgt_type);
462 }
463