1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #include "ublksrv_tgt.h"
3*94c4a1e1SFrank Piva #include "qcow2_format.h"
4*94c4a1e1SFrank Piva #include "qcow2.h"
5*94c4a1e1SFrank Piva
6*94c4a1e1SFrank Piva #define HEADER_SIZE 512
7*94c4a1e1SFrank Piva #define QCOW2_UNMAPPED (u64)(-1)
8*94c4a1e1SFrank Piva
qcow2_init_tgt(struct ublksrv_dev * dev,int type,int argc,char * argv[])9*94c4a1e1SFrank Piva static int qcow2_init_tgt(struct ublksrv_dev *dev, int type, int argc, char
10*94c4a1e1SFrank Piva *argv[])
11*94c4a1e1SFrank Piva {
12*94c4a1e1SFrank Piva struct ublksrv_tgt_info *tgt = &dev->tgt;
13*94c4a1e1SFrank Piva const struct ublksrv_ctrl_dev_info *info =
14*94c4a1e1SFrank Piva ublksrv_ctrl_get_dev_info(ublksrv_get_ctrl_dev(dev));
15*94c4a1e1SFrank Piva static const struct option lo_longopts[] = {
16*94c4a1e1SFrank Piva { "file", 1, NULL, 'f' },
17*94c4a1e1SFrank Piva { NULL }
18*94c4a1e1SFrank Piva };
19*94c4a1e1SFrank Piva int jbuf_size;
20*94c4a1e1SFrank Piva char *jbuf;
21*94c4a1e1SFrank Piva int fd, opt, ret;
22*94c4a1e1SFrank Piva void *header_buf;
23*94c4a1e1SFrank Piva QCowHeader *header;
24*94c4a1e1SFrank Piva char *file = NULL;
25*94c4a1e1SFrank Piva struct ublksrv_tgt_base_json tgt_json = {
26*94c4a1e1SFrank Piva .type = type,
27*94c4a1e1SFrank Piva };
28*94c4a1e1SFrank Piva struct ublk_params p = {
29*94c4a1e1SFrank Piva .types = UBLK_PARAM_TYPE_BASIC,
30*94c4a1e1SFrank Piva .basic = {
31*94c4a1e1SFrank Piva //.attrs = UBLK_ATTR_READ_ONLY,
32*94c4a1e1SFrank Piva .logical_bs_shift = 9,
33*94c4a1e1SFrank Piva .physical_bs_shift = 12,
34*94c4a1e1SFrank Piva .io_opt_shift = 12,
35*94c4a1e1SFrank Piva .io_min_shift = 9,
36*94c4a1e1SFrank Piva .max_sectors = info->max_io_buf_bytes >> 9,
37*94c4a1e1SFrank Piva },
38*94c4a1e1SFrank Piva };
39*94c4a1e1SFrank Piva Qcow2State *qs;
40*94c4a1e1SFrank Piva
41*94c4a1e1SFrank Piva /* qcow2 doesn't support user copy yet */
42*94c4a1e1SFrank Piva if (info->flags & UBLK_F_USER_COPY)
43*94c4a1e1SFrank Piva return -EINVAL;
44*94c4a1e1SFrank Piva
45*94c4a1e1SFrank Piva //1024 queue depth is enough for qcow2, then we can store
46*94c4a1e1SFrank Piva //tag & l1 entry index in single u32 variable.
47*94c4a1e1SFrank Piva if (info->queue_depth > QCOW2_MAX_QUEUE_DEPTH)
48*94c4a1e1SFrank Piva return -EINVAL;
49*94c4a1e1SFrank Piva
50*94c4a1e1SFrank Piva //qcow2 target doesn't support MQ yet
51*94c4a1e1SFrank Piva if (info->nr_hw_queues > 1)
52*94c4a1e1SFrank Piva return -EINVAL;
53*94c4a1e1SFrank Piva
54*94c4a1e1SFrank Piva strcpy(tgt_json.name, "qcow2");
55*94c4a1e1SFrank Piva
56*94c4a1e1SFrank Piva if (type != UBLKSRV_TGT_TYPE_QCOW2)
57*94c4a1e1SFrank Piva return -EINVAL;
58*94c4a1e1SFrank Piva
59*94c4a1e1SFrank Piva while ((opt = getopt_long(argc, argv, "-:f:",
60*94c4a1e1SFrank Piva lo_longopts, NULL)) != -1) {
61*94c4a1e1SFrank Piva switch (opt) {
62*94c4a1e1SFrank Piva case 'f':
63*94c4a1e1SFrank Piva file = strdup(optarg);
64*94c4a1e1SFrank Piva break;
65*94c4a1e1SFrank Piva }
66*94c4a1e1SFrank Piva }
67*94c4a1e1SFrank Piva
68*94c4a1e1SFrank Piva if (!file)
69*94c4a1e1SFrank Piva return -EINVAL;
70*94c4a1e1SFrank Piva
71*94c4a1e1SFrank Piva if (posix_memalign((void **)&header_buf, 512, HEADER_SIZE))
72*94c4a1e1SFrank Piva return -EINVAL;
73*94c4a1e1SFrank Piva
74*94c4a1e1SFrank Piva header = (QCowHeader *)header_buf;
75*94c4a1e1SFrank Piva fd = open(file, O_RDWR);
76*94c4a1e1SFrank Piva if (fd < 0) {
77*94c4a1e1SFrank Piva ublk_err( "%s backing file %s can't be opened\n",
78*94c4a1e1SFrank Piva __func__, file);
79*94c4a1e1SFrank Piva return -EINVAL;
80*94c4a1e1SFrank Piva }
81*94c4a1e1SFrank Piva
82*94c4a1e1SFrank Piva if (fcntl(fd, F_SETFL, O_DIRECT))
83*94c4a1e1SFrank Piva ublk_err( "%s direct io on file %s isn't supported\n",
84*94c4a1e1SFrank Piva __func__, file);
85*94c4a1e1SFrank Piva
86*94c4a1e1SFrank Piva ret = read(fd, header_buf, HEADER_SIZE);
87*94c4a1e1SFrank Piva if (ret != HEADER_SIZE) {
88*94c4a1e1SFrank Piva ublk_err( "%s: return backing file %s %d %d\n",
89*94c4a1e1SFrank Piva __func__, file, HEADER_SIZE, ret);
90*94c4a1e1SFrank Piva return -EINVAL;
91*94c4a1e1SFrank Piva }
92*94c4a1e1SFrank Piva
93*94c4a1e1SFrank Piva if (be64_to_cpu(header->nb_snapshots) != 0) {
94*94c4a1e1SFrank Piva ublk_err( "%s: not support snapshots\n", __func__);
95*94c4a1e1SFrank Piva return -EINVAL;
96*94c4a1e1SFrank Piva }
97*94c4a1e1SFrank Piva
98*94c4a1e1SFrank Piva tgt_json.dev_size = tgt->dev_size = be64_to_cpu(header->size);
99*94c4a1e1SFrank Piva p.basic.dev_sectors = tgt->dev_size >> 9,
100*94c4a1e1SFrank Piva p.basic.chunk_sectors = 1 << (be32_to_cpu(header->cluster_bits) - 9);
101*94c4a1e1SFrank Piva tgt->tgt_ring_depth = info->queue_depth * 4;
102*94c4a1e1SFrank Piva tgt->extra_ios = QCOW2_PARA::META_MAX_TAGS;
103*94c4a1e1SFrank Piva tgt->iowq_max_workers[0] = 1;
104*94c4a1e1SFrank Piva tgt->nr_fds = 1;
105*94c4a1e1SFrank Piva tgt->fds[1] = fd;
106*94c4a1e1SFrank Piva tgt->tgt_data = qs = make_qcow2state(file, dev);
107*94c4a1e1SFrank Piva ublksrv_tgt_set_io_data_size(tgt);
108*94c4a1e1SFrank Piva
109*94c4a1e1SFrank Piva jbuf = ublksrv_tgt_realloc_json_buf(dev, &jbuf_size);
110*94c4a1e1SFrank Piva ublk_json_write_dev_info(dev, &jbuf, &jbuf_size);
111*94c4a1e1SFrank Piva ublk_json_write_target_base(dev, &jbuf, &jbuf_size, &tgt_json);
112*94c4a1e1SFrank Piva
113*94c4a1e1SFrank Piva ublk_json_write_params(dev, &jbuf, &jbuf_size, &p);
114*94c4a1e1SFrank Piva
115*94c4a1e1SFrank Piva ublk_json_write_tgt_str(dev, &jbuf, &jbuf_size,
116*94c4a1e1SFrank Piva "backing_file", file);
117*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
118*94c4a1e1SFrank Piva "version", qs->header.get_version());
119*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
120*94c4a1e1SFrank Piva "cluster_bits", qs->header.get_cluster_bits());
121*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
122*94c4a1e1SFrank Piva "header_length", qs->header.get_header_length());
123*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
124*94c4a1e1SFrank Piva "l1_size", qs->header.get_l1_size());
125*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
126*94c4a1e1SFrank Piva "refcount_table_clusters",
127*94c4a1e1SFrank Piva qs->header.get_refcount_table_clusters());
128*94c4a1e1SFrank Piva ublk_json_write_tgt_ulong(dev, &jbuf, &jbuf_size,
129*94c4a1e1SFrank Piva "refcount_order", qs->header.get_refcount_order());
130*94c4a1e1SFrank Piva
131*94c4a1e1SFrank Piva qs->header.dump_ext();
132*94c4a1e1SFrank Piva
133*94c4a1e1SFrank Piva return 0;
134*94c4a1e1SFrank Piva }
135*94c4a1e1SFrank Piva
qcow2_recovery_tgt(struct ublksrv_dev * dev,int type)136*94c4a1e1SFrank Piva static int qcow2_recovery_tgt(struct ublksrv_dev *dev, int type)
137*94c4a1e1SFrank Piva {
138*94c4a1e1SFrank Piva const struct ublksrv_ctrl_dev *cdev = ublksrv_get_ctrl_dev(dev);
139*94c4a1e1SFrank Piva const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);
140*94c4a1e1SFrank Piva const struct ublksrv_ctrl_dev_info *info =
141*94c4a1e1SFrank Piva ublksrv_ctrl_get_dev_info(cdev);
142*94c4a1e1SFrank Piva struct ublksrv_tgt_info *tgt = &dev->tgt;
143*94c4a1e1SFrank Piva int fd, ret;
144*94c4a1e1SFrank Piva char file[PATH_MAX];
145*94c4a1e1SFrank Piva struct ublk_params p;
146*94c4a1e1SFrank Piva int tgt_depth;
147*94c4a1e1SFrank Piva
148*94c4a1e1SFrank Piva ublk_assert(jbuf);
149*94c4a1e1SFrank Piva ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
150*94c4a1e1SFrank Piva ublk_assert(type == UBLKSRV_TGT_TYPE_QCOW2);
151*94c4a1e1SFrank Piva
152*94c4a1e1SFrank Piva /* qcow2 doesn't support user copy yet */
153*94c4a1e1SFrank Piva if (info->flags & UBLK_F_USER_COPY)
154*94c4a1e1SFrank Piva return -EINVAL;
155*94c4a1e1SFrank Piva
156*94c4a1e1SFrank Piva ret = ublksrv_json_read_target_str_info(jbuf, PATH_MAX, "backing_file", file);
157*94c4a1e1SFrank Piva if (ret < 0) {
158*94c4a1e1SFrank Piva ublk_err( "%s: backing file can't be retrieved from jbuf %d\n",
159*94c4a1e1SFrank Piva __func__, ret);
160*94c4a1e1SFrank Piva return ret;
161*94c4a1e1SFrank Piva }
162*94c4a1e1SFrank Piva
163*94c4a1e1SFrank Piva ret = ublksrv_json_read_params(&p, jbuf);
164*94c4a1e1SFrank Piva if (ret) {
165*94c4a1e1SFrank Piva ublk_err( "%s: read ublk params failed %d\n",
166*94c4a1e1SFrank Piva __func__, ret);
167*94c4a1e1SFrank Piva return ret;
168*94c4a1e1SFrank Piva }
169*94c4a1e1SFrank Piva
170*94c4a1e1SFrank Piva fd = open(file, O_RDWR);
171*94c4a1e1SFrank Piva if (fd < 0) {
172*94c4a1e1SFrank Piva ublk_err( "%s: backing file %s can't be opened\n",
173*94c4a1e1SFrank Piva __func__, file);
174*94c4a1e1SFrank Piva return fd;
175*94c4a1e1SFrank Piva }
176*94c4a1e1SFrank Piva if (fcntl(fd, F_SETFL, O_DIRECT))
177*94c4a1e1SFrank Piva ublk_err( "%s direct io on file %s isn't supported\n",
178*94c4a1e1SFrank Piva __func__, file);
179*94c4a1e1SFrank Piva
180*94c4a1e1SFrank Piva tgt_depth = QCOW2_PARA::META_MAX_TAGS > info->queue_depth * 2 ?
181*94c4a1e1SFrank Piva QCOW2_PARA::META_MAX_TAGS : info->queue_depth * 2;
182*94c4a1e1SFrank Piva tgt->dev_size = p.basic.dev_sectors << 9;
183*94c4a1e1SFrank Piva tgt->extra_ios = QCOW2_PARA::META_MAX_TAGS;
184*94c4a1e1SFrank Piva tgt->tgt_ring_depth = tgt_depth;
185*94c4a1e1SFrank Piva tgt->iowq_max_workers[0] = 1;
186*94c4a1e1SFrank Piva tgt->nr_fds = 1;
187*94c4a1e1SFrank Piva tgt->fds[1] = fd;
188*94c4a1e1SFrank Piva tgt->tgt_data = make_qcow2state(file, dev);
189*94c4a1e1SFrank Piva ublksrv_tgt_set_io_data_size(tgt);
190*94c4a1e1SFrank Piva
191*94c4a1e1SFrank Piva return 0;
192*94c4a1e1SFrank Piva }
193*94c4a1e1SFrank Piva
qcow2_usage_for_add(void)194*94c4a1e1SFrank Piva static void qcow2_usage_for_add(void)
195*94c4a1e1SFrank Piva {
196*94c4a1e1SFrank Piva printf(" qcow2: -f backing_file\n");
197*94c4a1e1SFrank Piva }
198*94c4a1e1SFrank Piva
199*94c4a1e1SFrank Piva /* todo: flush meta dirty data */
qcow2_queue_tgt_fsync(const struct ublksrv_queue * q,unsigned io_op,int tag,u32 len,u64 offset)200*94c4a1e1SFrank Piva static inline int qcow2_queue_tgt_fsync(const struct ublksrv_queue *q,
201*94c4a1e1SFrank Piva unsigned io_op, int tag, u32 len, u64 offset)
202*94c4a1e1SFrank Piva {
203*94c4a1e1SFrank Piva int fd = q->dev->tgt.fds[1];
204*94c4a1e1SFrank Piva struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
205*94c4a1e1SFrank Piva
206*94c4a1e1SFrank Piva if (!sqe) {
207*94c4a1e1SFrank Piva ublk_err("%s: tag %d offset %lx op %d, no sqe\n",
208*94c4a1e1SFrank Piva __func__, tag, offset, io_op);
209*94c4a1e1SFrank Piva return -ENOMEM;
210*94c4a1e1SFrank Piva }
211*94c4a1e1SFrank Piva
212*94c4a1e1SFrank Piva io_uring_prep_sync_file_range(sqe, fd, len ,offset,
213*94c4a1e1SFrank Piva IORING_FSYNC_DATASYNC);
214*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag, io_op, 0, 1);
215*94c4a1e1SFrank Piva qcow2_io_log("%s: queue io op %d(%llu %llx %llx)"
216*94c4a1e1SFrank Piva " (qid %d tag %u, cmd_op %u target: %d, user_data %llx)\n",
217*94c4a1e1SFrank Piva __func__, io_op, sqe->off, sqe->len, sqe->addr,
218*94c4a1e1SFrank Piva q->q_id, tag, io_op, 1, sqe->user_data);
219*94c4a1e1SFrank Piva return 1;
220*94c4a1e1SFrank Piva }
221*94c4a1e1SFrank Piva
qcow2_queue_tgt_zero_cluster(const Qcow2State * qs,const struct ublksrv_queue * q,int tag,u64 offset)222*94c4a1e1SFrank Piva static inline int qcow2_queue_tgt_zero_cluster(const Qcow2State *qs,
223*94c4a1e1SFrank Piva const struct ublksrv_queue *q, int tag, u64 offset)
224*94c4a1e1SFrank Piva {
225*94c4a1e1SFrank Piva int mode = FALLOC_FL_ZERO_RANGE;
226*94c4a1e1SFrank Piva int fd = q->dev->tgt.fds[1];
227*94c4a1e1SFrank Piva struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
228*94c4a1e1SFrank Piva
229*94c4a1e1SFrank Piva if (!sqe) {
230*94c4a1e1SFrank Piva ublk_err("%s: tag %d offset %lx op %d, no sqe for zeroing\n",
231*94c4a1e1SFrank Piva __func__, tag, offset, IORING_OP_FALLOCATE);
232*94c4a1e1SFrank Piva return -ENOMEM;
233*94c4a1e1SFrank Piva }
234*94c4a1e1SFrank Piva
235*94c4a1e1SFrank Piva io_uring_prep_fallocate(sqe, fd, mode, offset,
236*94c4a1e1SFrank Piva (1ULL << qs->header.cluster_bits));
237*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag,
238*94c4a1e1SFrank Piva IORING_OP_FALLOCATE, 0, 1);
239*94c4a1e1SFrank Piva qcow2_io_log("%s: queue io op %d(%llx %llx %llx)"
240*94c4a1e1SFrank Piva " (qid %d tag %u, target: %d, user_data %llx)\n",
241*94c4a1e1SFrank Piva __func__, IORING_OP_FALLOCATE, offset,
242*94c4a1e1SFrank Piva sqe->len, sqe->addr, q->q_id, tag, 1, sqe->user_data);
243*94c4a1e1SFrank Piva return 1;
244*94c4a1e1SFrank Piva }
245*94c4a1e1SFrank Piva
qcow2_queue_tgt_rw_fast(const struct ublksrv_queue * q,unsigned io_op,int tag,u64 offset,const struct ublksrv_io_desc * iod)246*94c4a1e1SFrank Piva static inline int qcow2_queue_tgt_rw_fast(const struct ublksrv_queue *q,
247*94c4a1e1SFrank Piva unsigned io_op, int tag, u64 offset,
248*94c4a1e1SFrank Piva const struct ublksrv_io_desc *iod)
249*94c4a1e1SFrank Piva {
250*94c4a1e1SFrank Piva struct io_uring_sqe *sqe = io_uring_get_sqe(q->ring_ptr);
251*94c4a1e1SFrank Piva
252*94c4a1e1SFrank Piva if (!sqe) {
253*94c4a1e1SFrank Piva ublk_err("%s: tag %d offset %lx op %d, no sqe for rw\n",
254*94c4a1e1SFrank Piva __func__, tag, offset, io_op);
255*94c4a1e1SFrank Piva return -ENOMEM;
256*94c4a1e1SFrank Piva }
257*94c4a1e1SFrank Piva
258*94c4a1e1SFrank Piva io_uring_prep_rw(io_op, sqe, 1, (void *)iod->addr,
259*94c4a1e1SFrank Piva iod->nr_sectors << 9, offset);
260*94c4a1e1SFrank Piva sqe->flags = IOSQE_FIXED_FILE;
261*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag, io_op, 0, 1);
262*94c4a1e1SFrank Piva qcow2_io_log("%s: queue io op %d(%llu %llx %llx)"
263*94c4a1e1SFrank Piva " (qid %d tag %u, cmd_op %u target: %d, user_data %llx)\n",
264*94c4a1e1SFrank Piva __func__, io_op, sqe->off, sqe->len, sqe->addr,
265*94c4a1e1SFrank Piva q->q_id, tag, io_op, 1, sqe->user_data);
266*94c4a1e1SFrank Piva
267*94c4a1e1SFrank Piva return 1;
268*94c4a1e1SFrank Piva
269*94c4a1e1SFrank Piva }
270*94c4a1e1SFrank Piva
qcow2_queue_tgt_rw(const struct ublksrv_queue * q,unsigned io_op,int tag,u64 offset,const struct ublksrv_io_desc * iod,u32 * expected_op)271*94c4a1e1SFrank Piva static inline int qcow2_queue_tgt_rw(const struct ublksrv_queue *q, unsigned io_op,
272*94c4a1e1SFrank Piva int tag, u64 offset, const struct ublksrv_io_desc *iod,
273*94c4a1e1SFrank Piva u32 *expected_op)
274*94c4a1e1SFrank Piva {
275*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
276*94c4a1e1SFrank Piva u64 cluster_start = offset & ~((1ULL << qs->header.cluster_bits) - 1);
277*94c4a1e1SFrank Piva Qcow2ClusterState *cs = qs->cluster_allocator.
278*94c4a1e1SFrank Piva get_cluster_state(cluster_start);
279*94c4a1e1SFrank Piva u8 cs_state = (cs == nullptr ? QCOW2_ALLOC_DONE : cs->get_state());
280*94c4a1e1SFrank Piva
281*94c4a1e1SFrank Piva if (cs_state >= QCOW2_ALLOC_ZEROED) {
282*94c4a1e1SFrank Piva *expected_op = io_op;
283*94c4a1e1SFrank Piva return qcow2_queue_tgt_rw_fast(q, io_op, tag, offset, iod);
284*94c4a1e1SFrank Piva }
285*94c4a1e1SFrank Piva
286*94c4a1e1SFrank Piva if (io_op == IORING_OP_WRITE) {
287*94c4a1e1SFrank Piva if (cs_state == QCOW2_ALLOC_ZEROING) {
288*94c4a1e1SFrank Piva cs->add_waiter(tag);
289*94c4a1e1SFrank Piva throw MetaUpdateException();
290*94c4a1e1SFrank Piva }
291*94c4a1e1SFrank Piva
292*94c4a1e1SFrank Piva if (cs_state == QCOW2_ALLOC_STARTED) {
293*94c4a1e1SFrank Piva int ret = qcow2_queue_tgt_zero_cluster(qs, q, tag,
294*94c4a1e1SFrank Piva cluster_start);
295*94c4a1e1SFrank Piva if (ret >= 0)
296*94c4a1e1SFrank Piva cs->set_state(QCOW2_ALLOC_ZEROING);
297*94c4a1e1SFrank Piva *expected_op = IORING_OP_FALLOCATE;
298*94c4a1e1SFrank Piva return ret;
299*94c4a1e1SFrank Piva }
300*94c4a1e1SFrank Piva return 0;
301*94c4a1e1SFrank Piva } else {
302*94c4a1e1SFrank Piva memset((void *)iod->addr, 0,
303*94c4a1e1SFrank Piva iod->nr_sectors << 9);
304*94c4a1e1SFrank Piva return 0;
305*94c4a1e1SFrank Piva }
306*94c4a1e1SFrank Piva }
307*94c4a1e1SFrank Piva
308*94c4a1e1SFrank Piva /* return how many sqes queued */
qcow2_queue_tgt_io(const struct ublksrv_queue * q,unsigned io_op,int tag,u64 offset,u32 * exp_op,const struct ublksrv_io_desc * iod)309*94c4a1e1SFrank Piva static int qcow2_queue_tgt_io(const struct ublksrv_queue *q, unsigned io_op,
310*94c4a1e1SFrank Piva int tag, u64 offset, u32 *exp_op,
311*94c4a1e1SFrank Piva const struct ublksrv_io_desc *iod)
312*94c4a1e1SFrank Piva {
313*94c4a1e1SFrank Piva int ret;
314*94c4a1e1SFrank Piva
315*94c4a1e1SFrank Piva //we don't support discard yet
316*94c4a1e1SFrank Piva if (io_op == IORING_OP_FALLOCATE)
317*94c4a1e1SFrank Piva return -ENOTSUP;
318*94c4a1e1SFrank Piva
319*94c4a1e1SFrank Piva if (io_op == IORING_OP_FSYNC) {
320*94c4a1e1SFrank Piva ret = qcow2_queue_tgt_fsync(q, io_op, tag,
321*94c4a1e1SFrank Piva iod->nr_sectors << 9, offset);
322*94c4a1e1SFrank Piva *exp_op = io_op;
323*94c4a1e1SFrank Piva } else
324*94c4a1e1SFrank Piva ret = qcow2_queue_tgt_rw(q, io_op, tag, offset, iod, exp_op);
325*94c4a1e1SFrank Piva
326*94c4a1e1SFrank Piva return ret;
327*94c4a1e1SFrank Piva }
328*94c4a1e1SFrank Piva
l2_entry_read_as_zero(u64 entry)329*94c4a1e1SFrank Piva static inline bool l2_entry_read_as_zero(u64 entry)
330*94c4a1e1SFrank Piva {
331*94c4a1e1SFrank Piva if (!entry || (entry & 0x1))
332*94c4a1e1SFrank Piva return true;
333*94c4a1e1SFrank Piva return false;
334*94c4a1e1SFrank Piva }
335*94c4a1e1SFrank Piva
__qcow2_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data,int tag)336*94c4a1e1SFrank Piva static co_io_job __qcow2_handle_io_async(const struct ublksrv_queue *q,
337*94c4a1e1SFrank Piva const struct ublk_io_data *data, int tag)
338*94c4a1e1SFrank Piva {
339*94c4a1e1SFrank Piva struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
340*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
341*94c4a1e1SFrank Piva const struct ublksrv_io_desc *iod = data->iod;
342*94c4a1e1SFrank Piva unsigned long start = iod->start_sector << 9;
343*94c4a1e1SFrank Piva u64 mapped_start;
344*94c4a1e1SFrank Piva qcow2_io_ctx_t ioc(tag, q->q_id);
345*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe;
346*94c4a1e1SFrank Piva int ret = 0;
347*94c4a1e1SFrank Piva unsigned int op = ublksrv_get_op(iod);
348*94c4a1e1SFrank Piva bool wait;
349*94c4a1e1SFrank Piva
350*94c4a1e1SFrank Piva qcow2_io_log("%s: tag %d, ublk op %x virt %llx/%u\n",
351*94c4a1e1SFrank Piva __func__, tag, op, start, (iod->nr_sectors << 9));
352*94c4a1e1SFrank Piva
353*94c4a1e1SFrank Piva qcow2_assert((start + (unsigned long)(iod->nr_sectors << 9)) <=
354*94c4a1e1SFrank Piva qs->get_dev_size());
355*94c4a1e1SFrank Piva again:
356*94c4a1e1SFrank Piva try {
357*94c4a1e1SFrank Piva mapped_start = qs->cluster_map.map_cluster(ioc, start,
358*94c4a1e1SFrank Piva op == UBLK_IO_OP_WRITE);
359*94c4a1e1SFrank Piva wait = false;
360*94c4a1e1SFrank Piva } catch (MetaIoException &meta_error) {
361*94c4a1e1SFrank Piva wait = true;
362*94c4a1e1SFrank Piva } catch (MetaUpdateException &meta_update_error) {
363*94c4a1e1SFrank Piva wait = true;
364*94c4a1e1SFrank Piva }
365*94c4a1e1SFrank Piva
366*94c4a1e1SFrank Piva if (wait) {
367*94c4a1e1SFrank Piva co_await__suspend_always(tag);
368*94c4a1e1SFrank Piva
369*94c4a1e1SFrank Piva cqe = io->tgt_io_cqe;
370*94c4a1e1SFrank Piva io->tgt_io_cqe = NULL;
371*94c4a1e1SFrank Piva ret = qcow2_meta_io_done(q, cqe);
372*94c4a1e1SFrank Piva if (ret == -EAGAIN)
373*94c4a1e1SFrank Piva goto again;
374*94c4a1e1SFrank Piva if (ret < 0)
375*94c4a1e1SFrank Piva goto exit;
376*94c4a1e1SFrank Piva }
377*94c4a1e1SFrank Piva
378*94c4a1e1SFrank Piva qcow2_io_log("%s: tag %d, ublk op %x virt %llx/%u to host %llx\n",
379*94c4a1e1SFrank Piva __func__, tag, op, start, (iod->nr_sectors << 9),
380*94c4a1e1SFrank Piva mapped_start);
381*94c4a1e1SFrank Piva
382*94c4a1e1SFrank Piva if (mapped_start == -1) {
383*94c4a1e1SFrank Piva ublk_err("%s: tag %d virt %lx op %d, unsupported format\n",
384*94c4a1e1SFrank Piva __func__, tag, start, op);
385*94c4a1e1SFrank Piva ret = -EIO;
386*94c4a1e1SFrank Piva } else if (!mapped_start) {
387*94c4a1e1SFrank Piva // write to unallocated cluster, so have to allocate first
388*94c4a1e1SFrank Piva if ((op == UBLK_IO_OP_READ) &&
389*94c4a1e1SFrank Piva l2_entry_read_as_zero(mapped_start)) {
390*94c4a1e1SFrank Piva ret = iod->nr_sectors << 9;
391*94c4a1e1SFrank Piva memset((void *)iod->addr, 0, ret);
392*94c4a1e1SFrank Piva } else {
393*94c4a1e1SFrank Piva ublk_err("%s: tag %d virt %lx op %d map failed\n",
394*94c4a1e1SFrank Piva __func__, tag, start, op);
395*94c4a1e1SFrank Piva ret = -EIO;
396*94c4a1e1SFrank Piva }
397*94c4a1e1SFrank Piva } else {
398*94c4a1e1SFrank Piva unsigned io_op = ublksrv_convert_cmd_op(iod);
399*94c4a1e1SFrank Piva unsigned exp_op;
400*94c4a1e1SFrank Piva
401*94c4a1e1SFrank Piva mapped_start &= ((1ULL << 63) - 1);
402*94c4a1e1SFrank Piva
403*94c4a1e1SFrank Piva qcow2_assert(mapped_start + (iod->nr_sectors << 9) <=
404*94c4a1e1SFrank Piva qs->cluster_allocator.max_physical_size);
405*94c4a1e1SFrank Piva queue_io:
406*94c4a1e1SFrank Piva //the only exception is from handling zeroing cluster
407*94c4a1e1SFrank Piva try {
408*94c4a1e1SFrank Piva ret = qcow2_queue_tgt_io(q, io_op, tag, mapped_start,
409*94c4a1e1SFrank Piva &exp_op, iod);
410*94c4a1e1SFrank Piva wait = false;
411*94c4a1e1SFrank Piva } catch (MetaUpdateException &meta_error) {
412*94c4a1e1SFrank Piva wait = true;
413*94c4a1e1SFrank Piva }
414*94c4a1e1SFrank Piva
415*94c4a1e1SFrank Piva if (wait) {
416*94c4a1e1SFrank Piva co_await__suspend_always(tag);
417*94c4a1e1SFrank Piva goto queue_io;
418*94c4a1e1SFrank Piva }
419*94c4a1e1SFrank Piva
420*94c4a1e1SFrank Piva if (ret > 0) {
421*94c4a1e1SFrank Piva u64 cluster_start = mapped_start &
422*94c4a1e1SFrank Piva ~((1ULL << qs->header.cluster_bits) - 1);
423*94c4a1e1SFrank Piva
424*94c4a1e1SFrank Piva co_await__suspend_always(tag);
425*94c4a1e1SFrank Piva cqe = io->tgt_io_cqe;
426*94c4a1e1SFrank Piva ret = cqe->res;
427*94c4a1e1SFrank Piva if (ret == -EAGAIN) {
428*94c4a1e1SFrank Piva qcow2_log("%s zeroing cluster IO eagain\n",
429*94c4a1e1SFrank Piva __func__);
430*94c4a1e1SFrank Piva //submit this write IO again
431*94c4a1e1SFrank Piva if (user_data_to_op(cqe->user_data) == io_op)
432*94c4a1e1SFrank Piva goto queue_io;
433*94c4a1e1SFrank Piva
434*94c4a1e1SFrank Piva //if the cluster zeroing IO isn't done, retry
435*94c4a1e1SFrank Piva if (qs->cluster_allocator.
436*94c4a1e1SFrank Piva alloc_cluster_reset(cluster_start))
437*94c4a1e1SFrank Piva goto queue_io;
438*94c4a1e1SFrank Piva }
439*94c4a1e1SFrank Piva
440*94c4a1e1SFrank Piva qcow2_io_log("%s: io done, tag %d res %d user_data %llx\n",
441*94c4a1e1SFrank Piva __func__, tag, ret,
442*94c4a1e1SFrank Piva cqe->user_data);
443*94c4a1e1SFrank Piva if (exp_op != io_op) {
444*94c4a1e1SFrank Piva if (user_data_to_op(cqe->user_data) == IORING_OP_FALLOCATE)
445*94c4a1e1SFrank Piva qs->cluster_allocator.alloc_cluster_zeroed(q,
446*94c4a1e1SFrank Piva tag, cluster_start);
447*94c4a1e1SFrank Piva goto queue_io;
448*94c4a1e1SFrank Piva }
449*94c4a1e1SFrank Piva } else if (ret == 0) {
450*94c4a1e1SFrank Piva ret = iod->nr_sectors << 9;
451*94c4a1e1SFrank Piva }
452*94c4a1e1SFrank Piva }
453*94c4a1e1SFrank Piva exit:
454*94c4a1e1SFrank Piva if (ret < 0)
455*94c4a1e1SFrank Piva ublk_err("%s io failed(%d %lx %u) ret %d\n", __func__,
456*94c4a1e1SFrank Piva op, start, iod->nr_sectors, ret);
457*94c4a1e1SFrank Piva qcow2_io_log("%s tag %d io complete(%d %llx %lu) ret %d\n", __func__,
458*94c4a1e1SFrank Piva tag, op, start, iod->nr_sectors, ret);
459*94c4a1e1SFrank Piva ublksrv_complete_io(q, tag, ret);
460*94c4a1e1SFrank Piva }
461*94c4a1e1SFrank Piva
qcow2_handle_io_async(const struct ublksrv_queue * q,const struct ublk_io_data * data)462*94c4a1e1SFrank Piva static int qcow2_handle_io_async(const struct ublksrv_queue *q,
463*94c4a1e1SFrank Piva const struct ublk_io_data *data)
464*94c4a1e1SFrank Piva {
465*94c4a1e1SFrank Piva struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
466*94c4a1e1SFrank Piva
467*94c4a1e1SFrank Piva io->co = __qcow2_handle_io_async(q, data, data->tag);
468*94c4a1e1SFrank Piva return 0;
469*94c4a1e1SFrank Piva }
470*94c4a1e1SFrank Piva
qcow2_deinit_tgt(const struct ublksrv_dev * dev)471*94c4a1e1SFrank Piva static void qcow2_deinit_tgt(const struct ublksrv_dev *dev)
472*94c4a1e1SFrank Piva {
473*94c4a1e1SFrank Piva Qcow2State *qs = dev_to_qcow2state(dev);
474*94c4a1e1SFrank Piva
475*94c4a1e1SFrank Piva //now all io slots are available, just use the zero tag
476*94c4a1e1SFrank Piva qcow2_io_ctx_t ioc(0, 0);
477*94c4a1e1SFrank Piva
478*94c4a1e1SFrank Piva qs->dump_meta();
479*94c4a1e1SFrank Piva
480*94c4a1e1SFrank Piva delete qs;
481*94c4a1e1SFrank Piva }
482*94c4a1e1SFrank Piva
qcow2_tgt_io_done(const struct ublksrv_queue * q,const struct ublk_io_data * data,const struct io_uring_cqe * cqe)483*94c4a1e1SFrank Piva static void qcow2_tgt_io_done(const struct ublksrv_queue *q,
484*94c4a1e1SFrank Piva const struct ublk_io_data *data, const struct io_uring_cqe *cqe)
485*94c4a1e1SFrank Piva {
486*94c4a1e1SFrank Piva unsigned tag = user_data_to_tag(cqe->user_data);
487*94c4a1e1SFrank Piva
488*94c4a1e1SFrank Piva qcow2_io_log("%s: res %d qid %u tag %u, cmd_op %u\n",
489*94c4a1e1SFrank Piva __func__, cqe->res, q->q_id,
490*94c4a1e1SFrank Piva user_data_to_tag(cqe->user_data),
491*94c4a1e1SFrank Piva user_data_to_op(cqe->user_data));
492*94c4a1e1SFrank Piva //special tag is ignored, so far it is used in sending
493*94c4a1e1SFrank Piva //fsync during flushing meta
494*94c4a1e1SFrank Piva if (tag != 0xffff) {
495*94c4a1e1SFrank Piva struct ublk_io_tgt *io = __ublk_get_io_tgt_data(data);
496*94c4a1e1SFrank Piva io->tgt_io_cqe = cqe;
497*94c4a1e1SFrank Piva io->co.resume();
498*94c4a1e1SFrank Piva }
499*94c4a1e1SFrank Piva }
500*94c4a1e1SFrank Piva
qcow2_handle_io_bg(const struct ublksrv_queue * q,int nr_queued_io)501*94c4a1e1SFrank Piva static void qcow2_handle_io_bg(const struct ublksrv_queue *q, int nr_queued_io)
502*94c4a1e1SFrank Piva {
503*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
504*94c4a1e1SFrank Piva
505*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_FLUSH | UBLK_DBG_QCOW2_META,
506*94c4a1e1SFrank Piva "%s %d, queued io %d\n", __func__, __LINE__, nr_queued_io);
507*94c4a1e1SFrank Piva qs->kill_slices(q);
508*94c4a1e1SFrank Piva again:
509*94c4a1e1SFrank Piva qs->meta_flushing.run_flush(q, nr_queued_io);
510*94c4a1e1SFrank Piva
511*94c4a1e1SFrank Piva if (!nr_queued_io && !qs->meta_flushing.is_flushing()) {
512*94c4a1e1SFrank Piva if (qs->has_dirty_slice())
513*94c4a1e1SFrank Piva goto again;
514*94c4a1e1SFrank Piva }
515*94c4a1e1SFrank Piva }
516*94c4a1e1SFrank Piva
qcow2_idle(const struct ublksrv_queue * q,bool enter)517*94c4a1e1SFrank Piva static void qcow2_idle(const struct ublksrv_queue *q, bool enter)
518*94c4a1e1SFrank Piva {
519*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
520*94c4a1e1SFrank Piva
521*94c4a1e1SFrank Piva if (!enter)
522*94c4a1e1SFrank Piva return;
523*94c4a1e1SFrank Piva
524*94c4a1e1SFrank Piva qs->shrink_cache();
525*94c4a1e1SFrank Piva }
526*94c4a1e1SFrank Piva
qcow2_init_queue(const struct ublksrv_queue * q,void ** queue_data_ptr)527*94c4a1e1SFrank Piva static int qcow2_init_queue(const struct ublksrv_queue *q,
528*94c4a1e1SFrank Piva void **queue_data_ptr)
529*94c4a1e1SFrank Piva {
530*94c4a1e1SFrank Piva Qcow2State *qs = dev_to_qcow2state(q->dev);
531*94c4a1e1SFrank Piva
532*94c4a1e1SFrank Piva *queue_data_ptr = (void *)qs;
533*94c4a1e1SFrank Piva
534*94c4a1e1SFrank Piva return 0;
535*94c4a1e1SFrank Piva }
536*94c4a1e1SFrank Piva
537*94c4a1e1SFrank Piva struct ublksrv_tgt_type qcow2_tgt_type = {
538*94c4a1e1SFrank Piva .handle_io_async = qcow2_handle_io_async,
539*94c4a1e1SFrank Piva .tgt_io_done = qcow2_tgt_io_done,
540*94c4a1e1SFrank Piva .handle_io_background = qcow2_handle_io_bg,
541*94c4a1e1SFrank Piva .usage_for_add = qcow2_usage_for_add,
542*94c4a1e1SFrank Piva .init_tgt = qcow2_init_tgt,
543*94c4a1e1SFrank Piva .deinit_tgt = qcow2_deinit_tgt,
544*94c4a1e1SFrank Piva .idle_fn = qcow2_idle,
545*94c4a1e1SFrank Piva .type = UBLKSRV_TGT_TYPE_QCOW2,
546*94c4a1e1SFrank Piva .name = "qcow2",
547*94c4a1e1SFrank Piva .recovery_tgt = qcow2_recovery_tgt,
548*94c4a1e1SFrank Piva .init_queue = qcow2_init_queue,
549*94c4a1e1SFrank Piva };
550*94c4a1e1SFrank Piva
551*94c4a1e1SFrank Piva static void tgt_qcow2_init() __attribute__((constructor));
552*94c4a1e1SFrank Piva
tgt_qcow2_init(void)553*94c4a1e1SFrank Piva static void tgt_qcow2_init(void)
554*94c4a1e1SFrank Piva {
555*94c4a1e1SFrank Piva ublksrv_register_tgt_type(&qcow2_tgt_type);
556*94c4a1e1SFrank Piva }
557