xref: /aosp_15_r20/external/ublksrv/tgt_loop.cpp (revision 94c4a1e103eb1715230460aab379dff275992c20)
1*94c4a1e1SFrank Piva // SPDX-License-Identifier: MIT or GPL-2.0-only
2*94c4a1e1SFrank Piva 
3*94c4a1e1SFrank Piva #include <config.h>
4*94c4a1e1SFrank Piva 
5*94c4a1e1SFrank Piva #include <poll.h>
6*94c4a1e1SFrank Piva #include <sys/epoll.h>
7*94c4a1e1SFrank Piva #include "ublksrv_tgt.h"
8*94c4a1e1SFrank Piva 
9*94c4a1e1SFrank Piva static bool user_copy;
10*94c4a1e1SFrank Piva static bool block_device;
11*94c4a1e1SFrank Piva 
backing_supports_discard(char * name)12*94c4a1e1SFrank Piva static bool backing_supports_discard(char *name)
13*94c4a1e1SFrank Piva {
14*94c4a1e1SFrank Piva 	int fd;
15*94c4a1e1SFrank Piva 	char buf[512];
16*94c4a1e1SFrank Piva 	int len;
17*94c4a1e1SFrank Piva 
18*94c4a1e1SFrank Piva 	len = snprintf(buf, 512, "/sys/block/%s/queue/discard_max_hw_bytes",
19*94c4a1e1SFrank Piva 			basename(name));
20*94c4a1e1SFrank Piva 	buf[len] = 0;
21*94c4a1e1SFrank Piva 	fd = open(buf, O_RDONLY);
22*94c4a1e1SFrank Piva 	if (fd > 0) {
23*94c4a1e1SFrank Piva 		char val[128];
24*94c4a1e1SFrank Piva 		int ret = pread(fd, val, 128, 0);
25*94c4a1e1SFrank Piva 		unsigned long long bytes = 0;
26*94c4a1e1SFrank Piva 
27*94c4a1e1SFrank Piva 		close(fd);
28*94c4a1e1SFrank Piva 		if (ret > 0)
29*94c4a1e1SFrank Piva 			bytes = strtol(val, NULL, 10);
30*94c4a1e1SFrank Piva 
31*94c4a1e1SFrank Piva 		if (bytes > 0)
32*94c4a1e1SFrank Piva 			return true;
33*94c4a1e1SFrank Piva 	}
34*94c4a1e1SFrank Piva 	return false;
35*94c4a1e1SFrank Piva }
36*94c4a1e1SFrank Piva 
loop_setup_tgt(struct ublksrv_dev * dev,int type,bool recovery,const char * jbuf)37*94c4a1e1SFrank Piva static int loop_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,
38*94c4a1e1SFrank Piva 		const char *jbuf)
39*94c4a1e1SFrank Piva {
40*94c4a1e1SFrank Piva 	struct ublksrv_tgt_info *tgt = &dev->tgt;
41*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev_info *info =
42*94c4a1e1SFrank Piva 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
43*94c4a1e1SFrank Piva 	int fd, ret;
44*94c4a1e1SFrank Piva 	long direct_io = 0;
45*94c4a1e1SFrank Piva 	struct ublk_params p;
46*94c4a1e1SFrank Piva 	char file[PATH_MAX];
47*94c4a1e1SFrank Piva 
48*94c4a1e1SFrank Piva 	ublk_assert(jbuf);
49*94c4a1e1SFrank Piva 
50*94c4a1e1SFrank Piva 	ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
51*94c4a1e1SFrank Piva 	if (ret < 0) {
52*94c4a1e1SFrank Piva 		ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
53*94c4a1e1SFrank Piva 				__func__, ret);
54*94c4a1e1SFrank Piva 		return ret;
55*94c4a1e1SFrank Piva 	}
56*94c4a1e1SFrank Piva 
57*94c4a1e1SFrank Piva 	ret = ublksrv_json_read_target_ulong_info(jbuf, "direct_io",
58*94c4a1e1SFrank Piva 			&direct_io);
59*94c4a1e1SFrank Piva 	if (ret) {
60*94c4a1e1SFrank Piva 		ublk_err( "%s: read target direct_io failed %d\n",
61*94c4a1e1SFrank Piva 				__func__, ret);
62*94c4a1e1SFrank Piva 		return ret;
63*94c4a1e1SFrank Piva 	}
64*94c4a1e1SFrank Piva 
65*94c4a1e1SFrank Piva 	ret = ublksrv_json_read_params(&p, jbuf);
66*94c4a1e1SFrank Piva 	if (ret) {
67*94c4a1e1SFrank Piva 		ublk_err( "%s: read ublk params failed %d\n",
68*94c4a1e1SFrank Piva 				__func__, ret);
69*94c4a1e1SFrank Piva 		return ret;
70*94c4a1e1SFrank Piva 	}
71*94c4a1e1SFrank Piva 
72*94c4a1e1SFrank Piva 	fd = open(file, O_RDWR);
73*94c4a1e1SFrank Piva 	if (fd < 0) {
74*94c4a1e1SFrank Piva 		ublk_err( "%s: backing file %s can't be opened\n",
75*94c4a1e1SFrank Piva 				__func__, file);
76*94c4a1e1SFrank Piva 		return fd;
77*94c4a1e1SFrank Piva 	}
78*94c4a1e1SFrank Piva 
79*94c4a1e1SFrank Piva 	if (direct_io)
80*94c4a1e1SFrank Piva 		fcntl(fd, F_SETFL, O_DIRECT);
81*94c4a1e1SFrank Piva 
82*94c4a1e1SFrank Piva 	ublksrv_tgt_set_io_data_size(tgt);
83*94c4a1e1SFrank Piva 	tgt->dev_size = p.basic.dev_sectors << 9;
84*94c4a1e1SFrank Piva 	tgt->tgt_ring_depth = info->queue_depth;
85*94c4a1e1SFrank Piva 	tgt->nr_fds = 1;
86*94c4a1e1SFrank Piva 	tgt->fds[1] = fd;
87*94c4a1e1SFrank Piva 	user_copy = info->flags & UBLK_F_USER_COPY;
88*94c4a1e1SFrank Piva 	if (user_copy)
89*94c4a1e1SFrank Piva 		tgt->tgt_ring_depth *= 2;
90*94c4a1e1SFrank Piva 
91*94c4a1e1SFrank Piva 	return 0;
92*94c4a1e1SFrank Piva }
93*94c4a1e1SFrank Piva 
loop_recovery_tgt(struct ublksrv_dev * dev,int type)94*94c4a1e1SFrank Piva static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
95*94c4a1e1SFrank Piva {
96*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
97*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev_info *info =
98*94c4a1e1SFrank Piva 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
99*94c4a1e1SFrank Piva 	const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
100*94c4a1e1SFrank Piva 
101*94c4a1e1SFrank Piva 	ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
102*94c4a1e1SFrank Piva 	ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
103*94c4a1e1SFrank Piva 
104*94c4a1e1SFrank Piva 	return loop_setup_tgt(dev, type, true, jbuf);
105*94c4a1e1SFrank Piva }
106*94c4a1e1SFrank Piva 
loop_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])107*94c4a1e1SFrank Piva static int loop_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
108*94c4a1e1SFrank Piva 		*argv[])
109*94c4a1e1SFrank Piva {
110*94c4a1e1SFrank Piva 	int buffered_io = 0;
111*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev_info *info =
112*94c4a1e1SFrank Piva 		ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
113*94c4a1e1SFrank Piva 	static const struct option lo_longopts[] = {
114*94c4a1e1SFrank Piva 		{ "file",		1,	NULL, 'f' },
115*94c4a1e1SFrank Piva 		{ "buffered_io",	no_argument, &buffered_io, 1},
116*94c4a1e1SFrank Piva 		{ NULL }
117*94c4a1e1SFrank Piva 	};
118*94c4a1e1SFrank Piva 	unsigned long long bytes;
119*94c4a1e1SFrank Piva 	struct stat st;
120*94c4a1e1SFrank Piva 	int fd, opt;
121*94c4a1e1SFrank Piva 	char *file = NULL;
122*94c4a1e1SFrank Piva 	int jbuf_size;
123*94c4a1e1SFrank Piva 	char *jbuf;
124*94c4a1e1SFrank Piva 	struct ublksrv_tgt_base_json tgt_json = {
125*94c4a1e1SFrank Piva 		.type = type,
126*94c4a1e1SFrank Piva 	};
127*94c4a1e1SFrank Piva 	struct ublk_params p = {
128*94c4a1e1SFrank Piva 		.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DISCARD,
129*94c4a1e1SFrank Piva 		.basic = {
130*94c4a1e1SFrank Piva 			.logical_bs_shift	= 9,
131*94c4a1e1SFrank Piva 			.physical_bs_shift	= 12,
132*94c4a1e1SFrank Piva 			.io_opt_shift	= 12,
133*94c4a1e1SFrank Piva 			.io_min_shift	= 9,
134*94c4a1e1SFrank Piva 			.max_sectors		= info->max_io_buf_bytes >> 9,
135*94c4a1e1SFrank Piva 		},
136*94c4a1e1SFrank Piva 
137*94c4a1e1SFrank Piva 		.discard = {
138*94c4a1e1SFrank Piva 			.max_discard_sectors	= UINT_MAX >> 9,
139*94c4a1e1SFrank Piva 			.max_discard_segments	= 1,
140*94c4a1e1SFrank Piva 		},
141*94c4a1e1SFrank Piva 	};
142*94c4a1e1SFrank Piva 	bool can_discard = false;
143*94c4a1e1SFrank Piva 
144*94c4a1e1SFrank Piva 	strcpy(tgt_json.name, "loop");
145*94c4a1e1SFrank Piva 
146*94c4a1e1SFrank Piva 	if (type != UBLKSRV_TGT_TYPE_LOOP)
147*94c4a1e1SFrank Piva 		return -1;
148*94c4a1e1SFrank Piva 
149*94c4a1e1SFrank Piva 	while ((opt = getopt_long(argc, argv, "-:f:",
150*94c4a1e1SFrank Piva 				  lo_longopts, NULL)) != -1) {
151*94c4a1e1SFrank Piva 		switch (opt) {
152*94c4a1e1SFrank Piva 		case 'f':
153*94c4a1e1SFrank Piva 			file = strdup(optarg);
154*94c4a1e1SFrank Piva 			break;
155*94c4a1e1SFrank Piva 		}
156*94c4a1e1SFrank Piva 	}
157*94c4a1e1SFrank Piva 
158*94c4a1e1SFrank Piva 	if (!file)
159*94c4a1e1SFrank Piva 		return -1;
160*94c4a1e1SFrank Piva 
161*94c4a1e1SFrank Piva 	fd = open(file, O_RDWR);
162*94c4a1e1SFrank Piva 	if (fd < 0) {
163*94c4a1e1SFrank Piva 		ublk_err( "%s: backing file %s can't be opened\n",
164*94c4a1e1SFrank Piva 				__func__, file);
165*94c4a1e1SFrank Piva 		return -2;
166*94c4a1e1SFrank Piva 	}
167*94c4a1e1SFrank Piva 
168*94c4a1e1SFrank Piva 	if (fstat(fd, &st) < 0)
169*94c4a1e1SFrank Piva 		return -2;
170*94c4a1e1SFrank Piva 
171*94c4a1e1SFrank Piva 	if (S_ISBLK(st.st_mode)) {
172*94c4a1e1SFrank Piva 		unsigned int bs, pbs;
173*94c4a1e1SFrank Piva 
174*94c4a1e1SFrank Piva 		if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
175*94c4a1e1SFrank Piva 			return -1;
176*94c4a1e1SFrank Piva 		if (ioctl(fd, BLKSSZGET, &bs) != 0)
177*94c4a1e1SFrank Piva 			return -1;
178*94c4a1e1SFrank Piva 		if (ioctl(fd, BLKPBSZGET, &pbs) != 0)
179*94c4a1e1SFrank Piva 			return -1;
180*94c4a1e1SFrank Piva 		block_device = true;
181*94c4a1e1SFrank Piva 		p.basic.logical_bs_shift = ilog2(bs);
182*94c4a1e1SFrank Piva 		p.basic.physical_bs_shift = ilog2(pbs);
183*94c4a1e1SFrank Piva 		can_discard = backing_supports_discard(file);
184*94c4a1e1SFrank Piva 	} else if (S_ISREG(st.st_mode)) {
185*94c4a1e1SFrank Piva 		block_device = false;
186*94c4a1e1SFrank Piva 		bytes = st.st_size;
187*94c4a1e1SFrank Piva 		can_discard = true;
188*94c4a1e1SFrank Piva 		p.basic.logical_bs_shift = ilog2(st.st_blksize);
189*94c4a1e1SFrank Piva 		p.basic.physical_bs_shift = ilog2(st.st_blksize);
190*94c4a1e1SFrank Piva 	} else {
191*94c4a1e1SFrank Piva 		bytes = 0;
192*94c4a1e1SFrank Piva 	}
193*94c4a1e1SFrank Piva 
194*94c4a1e1SFrank Piva 	/*
195*94c4a1e1SFrank Piva 	 * in case of buffered io, use common bs/pbs so that all FS
196*94c4a1e1SFrank Piva 	 * image can be supported
197*94c4a1e1SFrank Piva 	 */
198*94c4a1e1SFrank Piva 	if (buffered_io || !ublk_param_is_valid(&p) ||
199*94c4a1e1SFrank Piva 			fcntl(fd, F_SETFL, O_DIRECT)) {
200*94c4a1e1SFrank Piva 		p.basic.logical_bs_shift = 9;
201*94c4a1e1SFrank Piva 		p.basic.physical_bs_shift = 12;
202*94c4a1e1SFrank Piva 		buffered_io = 1;
203*94c4a1e1SFrank Piva 	}
204*94c4a1e1SFrank Piva 
205*94c4a1e1SFrank Piva 	tgt_json.dev_size = bytes;
206*94c4a1e1SFrank Piva 	p.basic.dev_sectors = bytes >> 9;
207*94c4a1e1SFrank Piva 
208*94c4a1e1SFrank Piva 	if (st.st_blksize && can_discard)
209*94c4a1e1SFrank Piva 		p.discard.discard_granularity = st.st_blksize;
210*94c4a1e1SFrank Piva 	else
211*94c4a1e1SFrank Piva 		p.types &= ~UBLK_PARAM_TYPE_DISCARD;
212*94c4a1e1SFrank Piva 
213*94c4a1e1SFrank Piva 	jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
214*94c4a1e1SFrank Piva 	ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
215*94c4a1e1SFrank Piva 	ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
216*94c4a1e1SFrank Piva 	ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size, "backing_file", file);
217*94c4a1e1SFrank Piva 	ublk_json_write_tgt_long(dev, &jbuf, &jbuf_size, "direct_io", !buffered_io);
218*94c4a1e1SFrank Piva 	ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
219*94c4a1e1SFrank Piva 
220*94c4a1e1SFrank Piva 	close(fd);
221*94c4a1e1SFrank Piva 
222*94c4a1e1SFrank Piva 	return loop_setup_tgt(dev, type, false, jbuf);
223*94c4a1e1SFrank Piva }
224*94c4a1e1SFrank Piva 
loop_usage_for_add(void)225*94c4a1e1SFrank Piva static void loop_usage_for_add(void)
226*94c4a1e1SFrank Piva {
227*94c4a1e1SFrank Piva 	printf("           loop: -f backing_file [--buffered_io]\n");
228*94c4a1e1SFrank Piva 	printf("           	default is direct IO to backing file\n");
229*94c4a1e1SFrank Piva }
230*94c4a1e1SFrank Piva 
loop_fallocate_mode(const struct ublksrv_io_desc * iod)231*94c4a1e1SFrank Piva static inline int loop_fallocate_mode(const struct ublksrv_io_desc *iod)
232*94c4a1e1SFrank Piva {
233*94c4a1e1SFrank Piva        __u16 ublk_op = ublksrv_get_op(iod);
234*94c4a1e1SFrank Piva        __u32 flags = ublksrv_get_flags(iod);
235*94c4a1e1SFrank Piva        int mode = FALLOC_FL_KEEP_SIZE;
236*94c4a1e1SFrank Piva 
237*94c4a1e1SFrank Piva        /* follow logic of linux kernel loop */
238*94c4a1e1SFrank Piva        if (ublk_op == UBLK_IO_OP_DISCARD) {
239*94c4a1e1SFrank Piva                mode |= FALLOC_FL_PUNCH_HOLE;
240*94c4a1e1SFrank Piva        } else if (ublk_op == UBLK_IO_OP_WRITE_ZEROES) {
241*94c4a1e1SFrank Piva                if (flags & UBLK_IO_F_NOUNMAP)
242*94c4a1e1SFrank Piva                        mode |= FALLOC_FL_ZERO_RANGE;
243*94c4a1e1SFrank Piva                else
244*94c4a1e1SFrank Piva                        mode |= FALLOC_FL_PUNCH_HOLE;
245*94c4a1e1SFrank Piva        } else {
246*94c4a1e1SFrank Piva                mode |= FALLOC_FL_ZERO_RANGE;
247*94c4a1e1SFrank Piva        }
248*94c4a1e1SFrank Piva 
249*94c4a1e1SFrank Piva        return mode;
250*94c4a1e1SFrank Piva }
251*94c4a1e1SFrank Piva 
loop_queue_tgt_read(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)252*94c4a1e1SFrank Piva static void loop_queue_tgt_read(const struct ublksrv_queue *q,
253*94c4a1e1SFrank Piva 		const struct ublksrv_io_desc *iod, int tag)
254*94c4a1e1SFrank Piva {
255*94c4a1e1SFrank Piva 	unsigned ublk_op = ublksrv_get_op(iod);
256*94c4a1e1SFrank Piva 
257*94c4a1e1SFrank Piva 	if (user_copy) {
258*94c4a1e1SFrank Piva 		struct io_uring_sqe *sqe, *sqe2;
259*94c4a1e1SFrank Piva 		__u64 pos = ublk_pos(q->q_id, tag, 0);
260*94c4a1e1SFrank Piva 		void *buf = ublksrv_queue_get_io_buf(q, tag);
261*94c4a1e1SFrank Piva 
262*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
263*94c4a1e1SFrank Piva 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
264*94c4a1e1SFrank Piva 				buf,
265*94c4a1e1SFrank Piva 				iod->nr_sectors << 9,
266*94c4a1e1SFrank Piva 				iod->start_sector << 9);
267*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
268*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
269*94c4a1e1SFrank Piva 
270*94c4a1e1SFrank Piva 		io_uring_prep_write(sqe2, 0 /*fds[0]*/,
271*94c4a1e1SFrank Piva 				buf, iod->nr_sectors << 9, pos);
272*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
273*94c4a1e1SFrank Piva 		/* bit63 marks us as tgt io */
274*94c4a1e1SFrank Piva 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
275*94c4a1e1SFrank Piva 	} else {
276*94c4a1e1SFrank Piva 		struct io_uring_sqe *sqe;
277*94c4a1e1SFrank Piva 		void *buf = (void *)iod->addr;
278*94c4a1e1SFrank Piva 
279*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
280*94c4a1e1SFrank Piva 		io_uring_prep_read(sqe, 1 /*fds[1]*/,
281*94c4a1e1SFrank Piva 			buf,
282*94c4a1e1SFrank Piva 			iod->nr_sectors << 9,
283*94c4a1e1SFrank Piva 			iod->start_sector << 9);
284*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
285*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
286*94c4a1e1SFrank Piva 	}
287*94c4a1e1SFrank Piva }
288*94c4a1e1SFrank Piva 
loop_queue_tgt_write(const struct ublksrv_queue * q,const struct ublksrv_io_desc * iod,int tag)289*94c4a1e1SFrank Piva static void loop_queue_tgt_write(const struct ublksrv_queue *q,
290*94c4a1e1SFrank Piva 		const struct ublksrv_io_desc *iod, int tag)
291*94c4a1e1SFrank Piva {
292*94c4a1e1SFrank Piva 	unsigned ublk_op = ublksrv_get_op(iod);
293*94c4a1e1SFrank Piva 
294*94c4a1e1SFrank Piva 	if (user_copy) {
295*94c4a1e1SFrank Piva 		struct io_uring_sqe *sqe, *sqe2;
296*94c4a1e1SFrank Piva 		__u64 pos = ublk_pos(q->q_id, tag, 0);
297*94c4a1e1SFrank Piva 		void *buf = ublksrv_queue_get_io_buf(q, tag);
298*94c4a1e1SFrank Piva 
299*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, &sqe2);
300*94c4a1e1SFrank Piva 		io_uring_prep_read(sqe, 0 /*fds[0]*/,
301*94c4a1e1SFrank Piva 			buf, iod->nr_sectors << 9, pos);
302*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE | IOSQE_IO_LINK);
303*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 1, 1);
304*94c4a1e1SFrank Piva 
305*94c4a1e1SFrank Piva 		io_uring_prep_write(sqe2, 1 /*fds[1]*/,
306*94c4a1e1SFrank Piva 			buf, iod->nr_sectors << 9,
307*94c4a1e1SFrank Piva 			iod->start_sector << 9);
308*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe2, IOSQE_FIXED_FILE);
309*94c4a1e1SFrank Piva 		/* bit63 marks us as tgt io */
310*94c4a1e1SFrank Piva 		sqe2->user_data = build_user_data(tag, ublk_op, 0, 1);
311*94c4a1e1SFrank Piva 	} else {
312*94c4a1e1SFrank Piva 		struct io_uring_sqe *sqe;
313*94c4a1e1SFrank Piva 		void *buf = (void *)iod->addr;
314*94c4a1e1SFrank Piva 
315*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
316*94c4a1e1SFrank Piva 		io_uring_prep_write(sqe, 1 /*fds[1]*/,
317*94c4a1e1SFrank Piva 			buf,
318*94c4a1e1SFrank Piva 			iod->nr_sectors << 9,
319*94c4a1e1SFrank Piva 			iod->start_sector << 9);
320*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
321*94c4a1e1SFrank Piva 		/* bit63 marks us as tgt io */
322*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
323*94c4a1e1SFrank Piva 	}
324*94c4a1e1SFrank Piva }
325*94c4a1e1SFrank Piva 
loop_queue_tgt_io(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)326*94c4a1e1SFrank Piva static int loop_queue_tgt_io(const struct ublksrv_queue *q,
327*94c4a1e1SFrank Piva 		const struct ublk_io_data *data, int tag)
328*94c4a1e1SFrank Piva {
329*94c4a1e1SFrank Piva 	const struct ublksrv_io_desc *iod = data->iod;
330*94c4a1e1SFrank Piva 	struct io_uring_sqe *sqe;
331*94c4a1e1SFrank Piva 	unsigned ublk_op = ublksrv_get_op(iod);
332*94c4a1e1SFrank Piva 
333*94c4a1e1SFrank Piva 	switch (ublk_op) {
334*94c4a1e1SFrank Piva 	case UBLK_IO_OP_FLUSH:
335*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
336*94c4a1e1SFrank Piva 		io_uring_prep_sync_file_range(sqe, 1 /*fds[1]*/,
337*94c4a1e1SFrank Piva 				iod->nr_sectors << 9,
338*94c4a1e1SFrank Piva 				iod->start_sector << 9,
339*94c4a1e1SFrank Piva 				IORING_FSYNC_DATASYNC);
340*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
341*94c4a1e1SFrank Piva 		/* bit63 marks us as tgt io */
342*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
343*94c4a1e1SFrank Piva 		break;
344*94c4a1e1SFrank Piva 	case UBLK_IO_OP_WRITE_ZEROES:
345*94c4a1e1SFrank Piva 	case UBLK_IO_OP_DISCARD:
346*94c4a1e1SFrank Piva 		ublk_get_sqe_pair(q->ring_ptr, &sqe, NULL);
347*94c4a1e1SFrank Piva 		io_uring_prep_fallocate(sqe, 1 /*fds[1]*/,
348*94c4a1e1SFrank Piva 				loop_fallocate_mode(iod),
349*94c4a1e1SFrank Piva 				iod->start_sector << 9,
350*94c4a1e1SFrank Piva 				iod->nr_sectors << 9);
351*94c4a1e1SFrank Piva 		io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
352*94c4a1e1SFrank Piva 		/* bit63 marks us as tgt io */
353*94c4a1e1SFrank Piva 		sqe->user_data = build_user_data(tag, ublk_op, 0, 1);
354*94c4a1e1SFrank Piva 		break;
355*94c4a1e1SFrank Piva 	case UBLK_IO_OP_READ:
356*94c4a1e1SFrank Piva 		loop_queue_tgt_read(q, iod, tag);
357*94c4a1e1SFrank Piva 		break;
358*94c4a1e1SFrank Piva 	case UBLK_IO_OP_WRITE:
359*94c4a1e1SFrank Piva 		loop_queue_tgt_write(q, iod, tag);
360*94c4a1e1SFrank Piva 		break;
361*94c4a1e1SFrank Piva 	default:
362*94c4a1e1SFrank Piva 		return -EINVAL;
363*94c4a1e1SFrank Piva 	}
364*94c4a1e1SFrank Piva 
365*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
366*94c4a1e1SFrank Piva 			iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
367*94c4a1e1SFrank Piva 
368*94c4a1e1SFrank Piva 	return 1;
369*94c4a1e1SFrank Piva }
370*94c4a1e1SFrank Piva 
__loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)371*94c4a1e1SFrank Piva static co_io_job __loop_handle_io_async(const struct ublksrv_queue *q,
372*94c4a1e1SFrank Piva 		const struct ublk_io_data *data, int tag)
373*94c4a1e1SFrank Piva {
374*94c4a1e1SFrank Piva 	int ret;
375*94c4a1e1SFrank Piva 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
376*94c4a1e1SFrank Piva 
377*94c4a1e1SFrank Piva 	io->queued_tgt_io = 0;
378*94c4a1e1SFrank Piva  again:
379*94c4a1e1SFrank Piva 	ret = loop_queue_tgt_io(q, data, tag);
380*94c4a1e1SFrank Piva 	if (ret > 0) {
381*94c4a1e1SFrank Piva 		if (io->queued_tgt_io)
382*94c4a1e1SFrank Piva 			ublk_err("bad queued_tgt_io %d\n", io->queued_tgt_io);
383*94c4a1e1SFrank Piva 		io->queued_tgt_io += 1;
384*94c4a1e1SFrank Piva 
385*94c4a1e1SFrank Piva 		co_await__suspend_always(tag);
386*94c4a1e1SFrank Piva 		io->queued_tgt_io -= 1;
387*94c4a1e1SFrank Piva 
388*94c4a1e1SFrank Piva 		if (io->tgt_io_cqe->res == -EAGAIN)
389*94c4a1e1SFrank Piva 			goto again;
390*94c4a1e1SFrank Piva 
391*94c4a1e1SFrank Piva 		ublksrv_complete_io(q, tag, io->tgt_io_cqe->res);
392*94c4a1e1SFrank Piva 	} else if (ret < 0) {
393*94c4a1e1SFrank Piva 		ublk_err( "fail to queue io %d, ret %d\n", tag, tag);
394*94c4a1e1SFrank Piva 	} else {
395*94c4a1e1SFrank Piva 		ublk_err( "no sqe %d\n", tag);
396*94c4a1e1SFrank Piva 	}
397*94c4a1e1SFrank Piva }
398*94c4a1e1SFrank Piva 
loop_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)399*94c4a1e1SFrank Piva static int loop_handle_io_async(const struct ublksrv_queue *q,
400*94c4a1e1SFrank Piva 		const struct ublk_io_data *data)
401*94c4a1e1SFrank Piva {
402*94c4a1e1SFrank Piva 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
403*94c4a1e1SFrank Piva 
404*94c4a1e1SFrank Piva 	if (block_device && ublksrv_get_op(data->iod) == UBLK_IO_OP_DISCARD) {
405*94c4a1e1SFrank Piva 		__u64 r[2];
406*94c4a1e1SFrank Piva 		int res;
407*94c4a1e1SFrank Piva 
408*94c4a1e1SFrank Piva 		io_uring_submit(q->ring_ptr);
409*94c4a1e1SFrank Piva 
410*94c4a1e1SFrank Piva 		r[0] = data->iod->start_sector << 9;
411*94c4a1e1SFrank Piva 		r[1] = data->iod->nr_sectors << 9;
412*94c4a1e1SFrank Piva 		res = ioctl(q->dev->tgt.fds[1], BLKDISCARD, &r);
413*94c4a1e1SFrank Piva 		ublksrv_complete_io(q, data->tag, res);
414*94c4a1e1SFrank Piva 	} else {
415*94c4a1e1SFrank Piva 		io->co = __loop_handle_io_async(q, data, data->tag);
416*94c4a1e1SFrank Piva 	}
417*94c4a1e1SFrank Piva 	return 0;
418*94c4a1e1SFrank Piva }
419*94c4a1e1SFrank Piva 
loop_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)420*94c4a1e1SFrank Piva static void loop_tgt_io_done(const struct ublksrv_queue *q,
421*94c4a1e1SFrank Piva 		const struct ublk_io_data *data,
422*94c4a1e1SFrank Piva 		const struct io_uring_cqe *cqe)
423*94c4a1e1SFrank Piva {
424*94c4a1e1SFrank Piva 	int tag = user_data_to_tag(cqe->user_data);
425*94c4a1e1SFrank Piva 	struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
426*94c4a1e1SFrank Piva 
427*94c4a1e1SFrank Piva 	if (user_data_to_tgt_data(cqe->user_data))
428*94c4a1e1SFrank Piva 		return;
429*94c4a1e1SFrank Piva 
430*94c4a1e1SFrank Piva 	ublk_assert(tag == data->tag);
431*94c4a1e1SFrank Piva 	if (!io->queued_tgt_io)
432*94c4a1e1SFrank Piva 		ublk_err("%s: wrong queued_tgt_io: res %d qid %u tag %u, cmd_op %u\n",
433*94c4a1e1SFrank Piva 			__func__, cqe->res, q->q_id,
434*94c4a1e1SFrank Piva 			user_data_to_tag(cqe->user_data),
435*94c4a1e1SFrank Piva 			user_data_to_op(cqe->user_data));
436*94c4a1e1SFrank Piva 	io->tgt_io_cqe = cqe;
437*94c4a1e1SFrank Piva 	io->co.resume();
438*94c4a1e1SFrank Piva }
439*94c4a1e1SFrank Piva 
loop_deinit_tgt(const struct ublksrv_dev * dev)440*94c4a1e1SFrank Piva static void loop_deinit_tgt(const struct ublksrv_dev *dev)
441*94c4a1e1SFrank Piva {
442*94c4a1e1SFrank Piva 	fsync(dev->tgt.fds[1]);
443*94c4a1e1SFrank Piva 	close(dev->tgt.fds[1]);
444*94c4a1e1SFrank Piva }
445*94c4a1e1SFrank Piva 
446*94c4a1e1SFrank Piva struct ublksrv_tgt_type  loop_tgt_type = {
447*94c4a1e1SFrank Piva 	.handle_io_async = loop_handle_io_async,
448*94c4a1e1SFrank Piva 	.tgt_io_done = loop_tgt_io_done,
449*94c4a1e1SFrank Piva 	.usage_for_add	=  loop_usage_for_add,
450*94c4a1e1SFrank Piva 	.init_tgt = loop_init_tgt,
451*94c4a1e1SFrank Piva 	.deinit_tgt	=  loop_deinit_tgt,
452*94c4a1e1SFrank Piva 	.type	= UBLKSRV_TGT_TYPE_LOOP,
453*94c4a1e1SFrank Piva 	.name	=  "loop",
454*94c4a1e1SFrank Piva 	.recovery_tgt = loop_recovery_tgt,
455*94c4a1e1SFrank Piva };
456*94c4a1e1SFrank Piva 
457*94c4a1e1SFrank Piva static void tgt_loop_init() __attribute__((constructor));
458*94c4a1e1SFrank Piva 
tgt_loop_init(void)459*94c4a1e1SFrank Piva static void tgt_loop_init(void)
460*94c4a1e1SFrank Piva {
461*94c4a1e1SFrank Piva 	ublksrv_register_tgt_type(&loop_tgt_type);
462*94c4a1e1SFrank Piva }
463