// SPDX-License-Identifier: MIT or GPL-2.0-only #include #include #include #include "ublksrv_tgt.h" static bool user_copy; static bool block_device; static bool backing_supports_discard(char *name) { int fd; char buf[512]; int len; len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes", basename(name)); buf[len] = 0; fd = open(buf, O_RDONLY); if (fd > 0) { char val[128]; int ret = pread(fd, val, 128, 0); unsigned long long bytes = 0; close(fd); if (ret > 0) bytes = strtol(val, NULL, 10); if (bytes > 0) return true; } return false; } static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery, const char *jbuf) { struct ublksrv_tgt_info *tgt = &dev->tgt; const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev)); int fd, ret; long direct_io = 0; struct ublk_params p; char file[PATH_MAX]; ublk_assert(jbuf); ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file); if (ret < 0) { ublk_err( "%s: backing file can't be retrieved from jbuf %d\n", __func__, ret); return ret; } ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io", &direct_io); if (ret) { ublk_err( "%s: read target direct_io failed %d\n", __func__, ret); return ret; } ret = ublksrv_json_read_params(&p, jbuf); if (ret) { ublk_err( "%s: read ublk params failed %d\n", __func__, ret); return ret; } fd = open(file, O_RDWR); if (fd < 0) { ublk_err( "%s: backing file %s can't be opened\n", __func__, file); return fd; } if (direct_io) fcntl(fd, F_SETFL, O_DIRECT); ublksrv_tgt_set_io_data_size(tgt); tgt->dev_size = p.basic.dev_sectors << 9; tgt->tgt_ring_depth = info->queue_depth; tgt->nr_fds = 1; tgt->fds[1] = fd; user_copy = info->flags & UBLK_F_USER_COPY; if (user_copy) tgt->tgt_ring_depth *= 2; return 0; } static int loop_recovery_tgt(struct ublksrv_dev *dev, int type) { const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev); const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev)); const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev); ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP); ublk_assert(info->state == UBLK_S_DEV_QUIESCED); return loop_setup_tgt(dev, type, true, jbuf); } static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char *argv[]) { int buffered_io = 0; const struct ublksrv_ctrl_dev_info *info = ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev)); static const struct option lo_longopts[] = { { "file", 1, NULL, 'f' }, { "buffered_io", no_argument, &buffered_io, 1}, { NULL } }; unsigned long long bytes; struct stat st; int fd, opt; char *file = NULL; int jbuf_size; char *jbuf; struct ublksrv_tgt_base_json tgt_json = { .type = type, }; struct ublk_params p = { .types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD, .basic = { .logical_bs_shift = 9, .physical_bs_shift = 12, .io_opt_shift = 12, .io_min_shift = 9, .max_sectors = info->max_io_buf_bytes >> 9, }, .discard = { .max_discard_sectors = UINT_MAX >> 9, .max_discard_segments = 1, }, }; bool can_discard = false; strcpy(tgt_json.name, "loop"); if (type != UBLKSRV_TGT_TYPE_LOOP) return -1; while ((opt = getopt_long(argc, argv, "-:f:", lo_longopts, NULL)) != -1) { switch (opt) { case 'f': file = strdup(optarg); break; } } if (!file) return -1; fd = open(file, O_RDWR); if (fd < 0) { ublk_err( "%s: backing file %s can't be opened\n", __func__, file); return -2; } if (fstat(fd, &st) < 0) return -2; if (S_ISBLK(st.st_mode)) { unsigned int bs, pbs; if (ioctl(fd, BLKGETSIZE64, &bytes) != 0) return -1; if (ioctl(fd, BLKSSZGET, &bs) != 0) return -1; if (ioctl(fd, BLKPBSZGET, &pbs) != 0) return -1; block_device = true; p.basic.logical_bs_shift = ilog2(bs); p.basic.physical_bs_shift = ilog2(pbs); can_discard = backing_supports_discard(file); } else if (S_ISREG(st.st_mode)) { block_device = false; bytes = st.st_size; can_discard = true; p.basic.logical_bs_shift = ilog2(st.st_blksize); p.basic.physical_bs_shift = ilog2(st.st_blksize); } else { bytes = 0; } /* * in case of buffered io, use common bs/pbs so that all FS * image can be supported */ if (buffered_io || !ublk_param_is_valid(&p) || fcntl(fd, F_SETFL, O_DIRECT)) { p.basic.logical_bs_shift = 9; p.basic.physical_bs_shift = 12; buffered_io = 1; } tgt_json.dev_size = bytes; p.basic.dev_sectors = bytes >> 9; if (st.st_blksize && can_discard) p.discard.discard_granularity = st.st_blksize; else p.types &= ~UBLK_PARAM_TYPE_DISCARD; jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size); ublk_json_write_dev_info(dev, &jbuf, &jbuf_size); ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json); ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file); ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io); ublk_json_write_params(dev, &jbuf, &jbuf_size, &p); close(fd); return loop_setup_tgt(dev, type, false, jbuf); } static void loop_usage_for_add(void) { printf(" loop: -f backing_file [--buffered_io]\n"); printf(" default is direct IO to backing file\n"); } static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod) { __u16 ublk_op = ublksrv_get_op(iod); __u32 flags = ublksrv_get_flags(iod); int mode = FALLOC_FL_KEEP_SIZE; /* follow logic of linux kernel loop */ if (ublk_op == UBLK_IO_OP_DISCARD) { mode |= FALLOC_FL_PUNCH_HOLE; } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) { if (flags & UBLK_IO_F_NOUNMAP) mode |= FALLOC_FL_ZERO_RANGE; else mode |= FALLOC_FL_PUNCH_HOLE; } else { mode |= FALLOC_FL_ZERO_RANGE; } return mode; } static void loop_queue_tgt_read(const struct ublksrv_queue *q, const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); if (user_copy) { struct io_uring_sqe *sqe, *sqe2; __u64 pos = ublk_pos(q->q_id, tag, 0); void *buf = ublksrv_queue_get_io_buf(q, tag); ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2); io_uring_prep_read(sqe, 1 /*fds[1]*/, buf, iod->nr_sectors << 9, iod->start_sector << 9); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK); sqe->user_data = build_user_data(tag, ublk_op, 1, 1); io_uring_prep_write(sqe2, 0 /*fds[0]*/, buf, iod->nr_sectors << 9, pos); io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe2->user_data = build_user_data(tag, ublk_op, 0, 1); } else { struct io_uring_sqe *sqe; void *buf = (void *)iod->addr; ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL); io_uring_prep_read(sqe, 1 /*fds[1]*/, buf, iod->nr_sectors << 9, iod->start_sector << 9); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); sqe->user_data = build_user_data(tag, ublk_op, 0, 1); } } static void loop_queue_tgt_write(const struct ublksrv_queue *q, const struct ublksrv_io_desc *iod, int tag) { unsigned ublk_op = ublksrv_get_op(iod); if (user_copy) { struct io_uring_sqe *sqe, *sqe2; __u64 pos = ublk_pos(q->q_id, tag, 0); void *buf = ublksrv_queue_get_io_buf(q, tag); ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2); io_uring_prep_read(sqe, 0 /*fds[0]*/, buf, iod->nr_sectors << 9, pos); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK); sqe->user_data = build_user_data(tag, ublk_op, 1, 1); io_uring_prep_write(sqe2, 1 /*fds[1]*/, buf, iod->nr_sectors << 9, iod->start_sector << 9); io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe2->user_data = build_user_data(tag, ublk_op, 0, 1); } else { struct io_uring_sqe *sqe; void *buf = (void *)iod->addr; ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL); io_uring_prep_write(sqe, 1 /*fds[1]*/, buf, iod->nr_sectors << 9, iod->start_sector << 9); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe->user_data = build_user_data(tag, ublk_op, 0, 1); } } static int loop_queue_tgt_io(const struct ublksrv_queue *q, const struct ublk_io_data *data, int tag) { const struct ublksrv_io_desc *iod = data->iod; struct io_uring_sqe *sqe; unsigned ublk_op = ublksrv_get_op(iod); switch (ublk_op) { case UBLK_IO_OP_FLUSH: ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL); io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/, iod->nr_sectors << 9, iod->start_sector << 9, IORING_FSYNC_DATASYNC); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe->user_data = build_user_data(tag, ublk_op, 0, 1); break; case UBLK_IO_OP_WRITE_ZEROES: case UBLK_IO_OP_DISCARD: ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL); io_uring_prep_fallocate(sqe, 1 /*fds[1]*/, loop_fallocate_mode(iod), iod->start_sector << 9, iod->nr_sectors << 9); io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE); /* bit63 marks us as tgt io */ sqe->user_data = build_user_data(tag, ublk_op, 0, 1); break; case UBLK_IO_OP_READ: loop_queue_tgt_read(q, iod, tag); break; case UBLK_IO_OP_WRITE: loop_queue_tgt_write(q, iod, tag); break; default: return -EINVAL; } ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag, iod->op_flags, iod->start_sector, iod->nr_sectors << 9); return 1; } static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q, const struct ublk_io_data *data, int tag) { int ret; struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); io->queued_tgt_io = 0; again: ret = loop_queue_tgt_io(q, data, tag); if (ret > 0) { if (io->queued_tgt_io) ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io); io->queued_tgt_io += 1; co_await__suspend_always(tag); io->queued_tgt_io -= 1; if (io->tgt_io_cqe->res == -EAGAIN) goto again; ublksrv_complete_io(q, tag, io->tgt_io_cqe->res); } else if (ret < 0) { ublk_err( "fail to queue io %d, ret %d\n", tag, tag); } else { ublk_err( "no sqe %d\n", tag); } } static int loop_handle_io_async(const struct ublksrv_queue *q, const struct ublk_io_data *data) { struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) { __u64 r[2]; int res; io_uring_submit(q->ring_ptr); r[0] = data->iod->start_sector << 9; r[1] = data->iod->nr_sectors << 9; res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r); ublksrv_complete_io(q, data->tag, res); } else { io->co = __loop_handle_io_async(q, data, data->tag); } return 0; } static void loop_tgt_io_done(const struct ublksrv_queue *q, const struct ublk_io_data *data, const struct io_uring_cqe *cqe) { int tag = user_data_to_tag(cqe->user_data); struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data); if (user_data_to_tgt_data(cqe->user_data)) return; ublk_assert(tag == data->tag); if (!io->queued_tgt_io) ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n", __func__, cqe->res, q->q_id, user_data_to_tag(cqe->user_data), user_data_to_op(cqe->user_data)); io->tgt_io_cqe = cqe; io->co.resume(); } static void loop_deinit_tgt(const struct ublksrv_dev *dev) { fsync(dev->tgt.fds[1]); close(dev->tgt.fds[1]); } struct ublksrv_tgt_type loop_tgt_type = { .handle_io_async = loop_handle_io_async, .tgt_io_done = loop_tgt_io_done, .usage_for_add = loop_usage_for_add, .init_tgt = loop_init_tgt, .deinit_tgt = loop_deinit_tgt, .type = UBLKSRV_TGT_TYPE_LOOP, .name = "loop", .recovery_tgt = loop_recovery_tgt, }; static void tgt_loop_init() __attribute__((constructor)); static void tgt_loop_init(void) { ublksrv_register_tgt_type(&loop_tgt_type); }