1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #include <cassert>
3*94c4a1e1SFrank Piva
4*94c4a1e1SFrank Piva #include "qcow2.h"
5*94c4a1e1SFrank Piva #include "ublksrv_tgt.h"
6*94c4a1e1SFrank Piva
7*94c4a1e1SFrank Piva
8*94c4a1e1SFrank Piva // refcnt is for slice only, and initialize it as two, one is for submission
9*94c4a1e1SFrank Piva // side, another is for free side. This way guarantees that the returned slice
10*94c4a1e1SFrank Piva // from alloc_slice is always valid
Qcow2Meta(Qcow2Header & h,u64 off,u32 sz,const char * name,u32 f)11*94c4a1e1SFrank Piva Qcow2Meta::Qcow2Meta(Qcow2Header &h, u64 off, u32 sz, const char *name, u32 f):
12*94c4a1e1SFrank Piva header(h), offset(off), buf_sz(sz), flags(f), refcnt(2)
13*94c4a1e1SFrank Piva {
14*94c4a1e1SFrank Piva //used for implementing slice's ->reset() only
15*94c4a1e1SFrank Piva if (f & QCOW2_META_DONT_ALLOC_BUF)
16*94c4a1e1SFrank Piva return;
17*94c4a1e1SFrank Piva
18*94c4a1e1SFrank Piva if (posix_memalign((void **)&addr, getpagesize(), sz))
19*94c4a1e1SFrank Piva ublk_err( "allocate memory %d bytes failed, %s\n",
20*94c4a1e1SFrank Piva sz, name);
21*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
22*94c4a1e1SFrank Piva id = name;
23*94c4a1e1SFrank Piva qcow2_log("%s: constructed, obj %p, buf size %d off %lx flags %x\n",
24*94c4a1e1SFrank Piva name, this, sz, off, flags);
25*94c4a1e1SFrank Piva #endif
26*94c4a1e1SFrank Piva }
27*94c4a1e1SFrank Piva
show(const char * func,int line)28*94c4a1e1SFrank Piva void Qcow2Meta::show(const char *func, int line)
29*94c4a1e1SFrank Piva {
30*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
31*94c4a1e1SFrank Piva qcow2_log("%s:%d id %s obj %p flags %x off %lx ref %d\n",
32*94c4a1e1SFrank Piva func, line, id, this, flags, offset, refcnt);
33*94c4a1e1SFrank Piva #else
34*94c4a1e1SFrank Piva qcow2_log("%s:%d obj %p flags %x off %lx ref %d\n",
35*94c4a1e1SFrank Piva func, line, this, flags, offset, refcnt);
36*94c4a1e1SFrank Piva #endif
37*94c4a1e1SFrank Piva }
38*94c4a1e1SFrank Piva
~Qcow2Meta()39*94c4a1e1SFrank Piva Qcow2Meta::~Qcow2Meta()
40*94c4a1e1SFrank Piva {
41*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
42*94c4a1e1SFrank Piva qcow2_log("%s: destructed, obj %p flags %x off %lx ref %d\n",
43*94c4a1e1SFrank Piva id, this, flags, offset, refcnt);
44*94c4a1e1SFrank Piva #endif
45*94c4a1e1SFrank Piva if (flags & QCOW2_META_DONT_ALLOC_BUF)
46*94c4a1e1SFrank Piva return;
47*94c4a1e1SFrank Piva
48*94c4a1e1SFrank Piva if (!is_top_meta() && (get_dirty(-1) || is_flushing() ||
49*94c4a1e1SFrank Piva (!get_update() && !get_evicted()))) {
50*94c4a1e1SFrank Piva qcow2_log("BUG %s: obj %p flags %x off %lx\n",
51*94c4a1e1SFrank Piva __func__, this, flags, offset);
52*94c4a1e1SFrank Piva qcow2_assert(0);
53*94c4a1e1SFrank Piva }
54*94c4a1e1SFrank Piva free(addr);
55*94c4a1e1SFrank Piva }
56*94c4a1e1SFrank Piva
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)57*94c4a1e1SFrank Piva int Qcow2Meta::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
58*94c4a1e1SFrank Piva {
59*94c4a1e1SFrank Piva int fd;
60*94c4a1e1SFrank Piva
61*94c4a1e1SFrank Piva if (addr == NULL)
62*94c4a1e1SFrank Piva return -EINVAL;
63*94c4a1e1SFrank Piva if (len > buf_sz) {
64*94c4a1e1SFrank Piva ublk_err( "%s %s: load too much %d(%d) \n",
65*94c4a1e1SFrank Piva __func__, typeid(*this).name(), len, buf_sz);
66*94c4a1e1SFrank Piva return -EINVAL;
67*94c4a1e1SFrank Piva }
68*94c4a1e1SFrank Piva if (!sync)
69*94c4a1e1SFrank Piva return -EOPNOTSUPP;
70*94c4a1e1SFrank Piva
71*94c4a1e1SFrank Piva //qcow2_log("%s: read %s offset %llx len %lu \n", __func__,
72*94c4a1e1SFrank Piva // typeid(*this).name(), offset, len);
73*94c4a1e1SFrank Piva fd = qs.img.fd;
74*94c4a1e1SFrank Piva lseek(fd, offset, SEEK_SET);
75*94c4a1e1SFrank Piva data_len = read(fd, addr, len);
76*94c4a1e1SFrank Piva if (data_len != len)
77*94c4a1e1SFrank Piva qcow2_log("%s: read %u(%u)\n", __func__, len, data_len);
78*94c4a1e1SFrank Piva if (data_len > 0)
79*94c4a1e1SFrank Piva flags |= QCOW2_META_UPDATE;
80*94c4a1e1SFrank Piva return data_len;
81*94c4a1e1SFrank Piva }
82*94c4a1e1SFrank Piva
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)83*94c4a1e1SFrank Piva int Qcow2Meta::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u64 off,
84*94c4a1e1SFrank Piva u32 len)
85*94c4a1e1SFrank Piva {
86*94c4a1e1SFrank Piva int fd = qs.img.fd;
87*94c4a1e1SFrank Piva int ret;
88*94c4a1e1SFrank Piva
89*94c4a1e1SFrank Piva if (!(flags & QCOW2_META_DIRTY))
90*94c4a1e1SFrank Piva return 0;
91*94c4a1e1SFrank Piva
92*94c4a1e1SFrank Piva if (!(flags & QCOW2_META_UPDATE))
93*94c4a1e1SFrank Piva ublk_err( "%s %s: buf isn't update\n", __func__,
94*94c4a1e1SFrank Piva typeid(*this).name());
95*94c4a1e1SFrank Piva
96*94c4a1e1SFrank Piva //qcow2_log("%s: write %s offset %llx len %lu \n", __func__,
97*94c4a1e1SFrank Piva // typeid(*this).name(), offset, buf_sz);
98*94c4a1e1SFrank Piva lseek(fd, off, SEEK_SET);
99*94c4a1e1SFrank Piva ret = write(fd, addr, len);
100*94c4a1e1SFrank Piva if (len != ret)
101*94c4a1e1SFrank Piva qcow2_log("%s: write %u(%u)\n", __func__, len, ret);
102*94c4a1e1SFrank Piva if (ret > 0)
103*94c4a1e1SFrank Piva flags &= ~QCOW2_META_DIRTY;
104*94c4a1e1SFrank Piva
105*94c4a1e1SFrank Piva return len;
106*94c4a1e1SFrank Piva }
107*94c4a1e1SFrank Piva
zero_buf()108*94c4a1e1SFrank Piva void Qcow2Meta::zero_buf() {
109*94c4a1e1SFrank Piva memset((void *)addr, 0, buf_sz);
110*94c4a1e1SFrank Piva }
111*94c4a1e1SFrank Piva
112*94c4a1e1SFrank Piva // Base class is constructed first, then follows member class/objects,
113*94c4a1e1SFrank Piva // and member classes are done in the order of their declaration,
114*94c4a1e1SFrank Piva // so here __a can be setup correctly.
Qcow2HeaderExtFeatureNameTable(char * addr,u64 offset)115*94c4a1e1SFrank Piva Qcow2HeaderExtFeatureNameTable::Qcow2HeaderExtFeatureNameTable(
116*94c4a1e1SFrank Piva char *addr, u64 offset): Qcow2HeaderExt(addr, offset),
117*94c4a1e1SFrank Piva __a(len / sizeof(struct feature_entry))
118*94c4a1e1SFrank Piva {
119*94c4a1e1SFrank Piva unsigned off = offset;
120*94c4a1e1SFrank Piva
121*94c4a1e1SFrank Piva for (int i = 0; i < __a.size(); i++) {
122*94c4a1e1SFrank Piva __a[i].feature_type = *(addr + off + 8);
123*94c4a1e1SFrank Piva __a[i].bit_num = *(addr + off + 9);
124*94c4a1e1SFrank Piva strncpy(__a[i].feature_name, addr + off + 10, 46);
125*94c4a1e1SFrank Piva off += 48;
126*94c4a1e1SFrank Piva }
127*94c4a1e1SFrank Piva }
128*94c4a1e1SFrank Piva
dump() const129*94c4a1e1SFrank Piva void Qcow2HeaderExtFeatureNameTable::dump() const
130*94c4a1e1SFrank Piva {
131*94c4a1e1SFrank Piva Qcow2HeaderExt::dump();
132*94c4a1e1SFrank Piva
133*94c4a1e1SFrank Piva for (int i = 0; i < __a.size(); i++)
134*94c4a1e1SFrank Piva qcow2_log("\t %d: type %x bit_num %u name %s\n",
135*94c4a1e1SFrank Piva i, __a[i].feature_type, __a[i].bit_num,
136*94c4a1e1SFrank Piva __a[i].feature_name);
137*94c4a1e1SFrank Piva }
138*94c4a1e1SFrank Piva
Qcow2Header(Qcow2State & state)139*94c4a1e1SFrank Piva Qcow2Header::Qcow2Header(Qcow2State &state): Qcow2Meta(*this, 0, 4096,
140*94c4a1e1SFrank Piva typeid(this).name(), 0), magic(0), version(0), cluster_bits(0),
141*94c4a1e1SFrank Piva refcount_order(0), qs(state)
142*94c4a1e1SFrank Piva {
143*94c4a1e1SFrank Piva backingfile_format_name = NULL;
144*94c4a1e1SFrank Piva feature_name_table = NULL;
145*94c4a1e1SFrank Piva enc_header_pointer = NULL;
146*94c4a1e1SFrank Piva bitmaps = NULL;
147*94c4a1e1SFrank Piva ext_data_file_name = NULL;
148*94c4a1e1SFrank Piva
149*94c4a1e1SFrank Piva load(state, 0, buf_sz, true);
150*94c4a1e1SFrank Piva }
151*94c4a1e1SFrank Piva
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)152*94c4a1e1SFrank Piva int Qcow2Header::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u64 off,
153*94c4a1e1SFrank Piva u32 len)
154*94c4a1e1SFrank Piva {
155*94c4a1e1SFrank Piva return Qcow2Meta::flush(qs, ioc, off, len);
156*94c4a1e1SFrank Piva }
157*94c4a1e1SFrank Piva
~Qcow2Header()158*94c4a1e1SFrank Piva Qcow2Header::~Qcow2Header()
159*94c4a1e1SFrank Piva {
160*94c4a1e1SFrank Piva delete backingfile_format_name;
161*94c4a1e1SFrank Piva delete feature_name_table;
162*94c4a1e1SFrank Piva delete enc_header_pointer;
163*94c4a1e1SFrank Piva delete bitmaps;
164*94c4a1e1SFrank Piva delete ext_data_file_name;
165*94c4a1e1SFrank Piva }
166*94c4a1e1SFrank Piva
dump_ext() const167*94c4a1e1SFrank Piva void Qcow2Header::dump_ext() const
168*94c4a1e1SFrank Piva {
169*94c4a1e1SFrank Piva if (backingfile_format_name)
170*94c4a1e1SFrank Piva backingfile_format_name->dump();
171*94c4a1e1SFrank Piva
172*94c4a1e1SFrank Piva if (ext_data_file_name)
173*94c4a1e1SFrank Piva ext_data_file_name->dump();
174*94c4a1e1SFrank Piva
175*94c4a1e1SFrank Piva if (feature_name_table)
176*94c4a1e1SFrank Piva feature_name_table->dump();
177*94c4a1e1SFrank Piva
178*94c4a1e1SFrank Piva if (bitmaps)
179*94c4a1e1SFrank Piva bitmaps->dump();
180*94c4a1e1SFrank Piva
181*94c4a1e1SFrank Piva if (enc_header_pointer)
182*94c4a1e1SFrank Piva enc_header_pointer->dump();
183*94c4a1e1SFrank Piva }
184*94c4a1e1SFrank Piva
185*94c4a1e1SFrank Piva /*
186*94c4a1e1SFrank Piva * populate header extensions
187*94c4a1e1SFrank Piva *
188*94c4a1e1SFrank Piva * The header may take more than 4k, which should be decided by
189*94c4a1e1SFrank Piva * backing_file_offset & backing_file_size __or__ populate
190*94c4a1e1SFrank Piva * header extensions.
191*94c4a1e1SFrank Piva */
populate()192*94c4a1e1SFrank Piva int Qcow2Header::populate()
193*94c4a1e1SFrank Piva {
194*94c4a1e1SFrank Piva char *buf = (char *)addr;
195*94c4a1e1SFrank Piva u64 start = (get_header_length() + 7) & ~0x7ULL;
196*94c4a1e1SFrank Piva u32 *p_magic = const_cast<u32 *> (&magic);
197*94c4a1e1SFrank Piva u32 *p_version = const_cast<u32 *> (&version);
198*94c4a1e1SFrank Piva u32 *p_cluster_bits = const_cast<u32 *> (&cluster_bits);
199*94c4a1e1SFrank Piva u32 *p_refcount_order = const_cast<u32 *> (&refcount_order);
200*94c4a1e1SFrank Piva
201*94c4a1e1SFrank Piva *p_magic = get_magic();
202*94c4a1e1SFrank Piva *p_version = get_version();
203*94c4a1e1SFrank Piva *p_cluster_bits = get_cluster_bits();
204*94c4a1e1SFrank Piva *p_refcount_order = get_refcount_order();
205*94c4a1e1SFrank Piva
206*94c4a1e1SFrank Piva if (version == 2)
207*94c4a1e1SFrank Piva goto exit;
208*94c4a1e1SFrank Piva
209*94c4a1e1SFrank Piva //todo: populate extensions
210*94c4a1e1SFrank Piva while (true) {
211*94c4a1e1SFrank Piva Qcow2HeaderExt ext(buf, start);
212*94c4a1e1SFrank Piva
213*94c4a1e1SFrank Piva switch (ext.type) {
214*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_END:
215*94c4a1e1SFrank Piva goto exit;
216*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_BACKING_FORMAT:
217*94c4a1e1SFrank Piva this->backingfile_format_name =
218*94c4a1e1SFrank Piva new Qcow2HeaderExtString(buf, start);
219*94c4a1e1SFrank Piva break;
220*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_FEATURE_TABLE:
221*94c4a1e1SFrank Piva this->feature_name_table =
222*94c4a1e1SFrank Piva new Qcow2HeaderExtFeatureNameTable(
223*94c4a1e1SFrank Piva buf, start);
224*94c4a1e1SFrank Piva break;
225*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_CRYPTO_HEADER:
226*94c4a1e1SFrank Piva this->enc_header_pointer =
227*94c4a1e1SFrank Piva new Qcow2HeaderExtEncHeader(buf, start);
228*94c4a1e1SFrank Piva break;
229*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_BITMAPS:
230*94c4a1e1SFrank Piva this->bitmaps =
231*94c4a1e1SFrank Piva new Qcow2HeaderExtBitmaps(buf, start);
232*94c4a1e1SFrank Piva break;
233*94c4a1e1SFrank Piva case QCOW2_EXT_MAGIC_DATA_FILE:
234*94c4a1e1SFrank Piva this->ext_data_file_name =
235*94c4a1e1SFrank Piva new Qcow2HeaderExtString(buf, start);
236*94c4a1e1SFrank Piva break;
237*94c4a1e1SFrank Piva };
238*94c4a1e1SFrank Piva start += 8 + (ext.len + 7) & ~0x7ULL;
239*94c4a1e1SFrank Piva }
240*94c4a1e1SFrank Piva exit:
241*94c4a1e1SFrank Piva return 0;
242*94c4a1e1SFrank Piva }
243*94c4a1e1SFrank Piva
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)244*94c4a1e1SFrank Piva int Qcow2Header::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
245*94c4a1e1SFrank Piva {
246*94c4a1e1SFrank Piva int ret;
247*94c4a1e1SFrank Piva
248*94c4a1e1SFrank Piva ret = Qcow2Meta::load(qs, ioc, len, sync);
249*94c4a1e1SFrank Piva if (ret <= 0)
250*94c4a1e1SFrank Piva goto fail;
251*94c4a1e1SFrank Piva
252*94c4a1e1SFrank Piva ret = populate();
253*94c4a1e1SFrank Piva return ret;
254*94c4a1e1SFrank Piva fail:
255*94c4a1e1SFrank Piva ublk_err( "%s: load failed %d", __func__, ret);
256*94c4a1e1SFrank Piva return ret;
257*94c4a1e1SFrank Piva }
258*94c4a1e1SFrank Piva
operator <<(std::ostream & os,const Qcow2Header & h)259*94c4a1e1SFrank Piva std::ostream & operator<<(std::ostream &os, const Qcow2Header &h)
260*94c4a1e1SFrank Piva {
261*94c4a1e1SFrank Piva char buf[256];
262*94c4a1e1SFrank Piva
263*94c4a1e1SFrank Piva sprintf(buf, "magic: %x", h.magic);
264*94c4a1e1SFrank Piva std::cout << std::string(buf) << std::endl;
265*94c4a1e1SFrank Piva qcow2_log("%s", buf);
266*94c4a1e1SFrank Piva
267*94c4a1e1SFrank Piva sprintf(buf, "version: %x\n", h.version);
268*94c4a1e1SFrank Piva std::cout << std::string(buf) << std::endl;
269*94c4a1e1SFrank Piva qcow2_log("%s", buf);
270*94c4a1e1SFrank Piva
271*94c4a1e1SFrank Piva sprintf(buf, "cluster_bits: %x\n", h.cluster_bits);
272*94c4a1e1SFrank Piva std::cout << std::string(buf) << std::endl;
273*94c4a1e1SFrank Piva qcow2_log("%s", buf);
274*94c4a1e1SFrank Piva
275*94c4a1e1SFrank Piva sprintf(buf, "refcount_order: %x\n", h.refcount_order);
276*94c4a1e1SFrank Piva std::cout << std::string(buf) << std::endl;
277*94c4a1e1SFrank Piva qcow2_log("%s", buf);
278*94c4a1e1SFrank Piva
279*94c4a1e1SFrank Piva return os;
280*94c4a1e1SFrank Piva }
281*94c4a1e1SFrank Piva
Qcow2MappingMeta(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 f)282*94c4a1e1SFrank Piva Qcow2MappingMeta::Qcow2MappingMeta(Qcow2State &qs, u64 off, u32 buf_sz,
283*94c4a1e1SFrank Piva const char *cls_name, u32 f):
284*94c4a1e1SFrank Piva Qcow2Meta(qs.header, off, buf_sz, cls_name, f)
285*94c4a1e1SFrank Piva {
286*94c4a1e1SFrank Piva //default each entry is 64bits(8bytes) except for:
287*94c4a1e1SFrank Piva // extended l2 entry is 128bit, refcount blk has refcount_order
288*94c4a1e1SFrank Piva entry_bits_order = 6;
289*94c4a1e1SFrank Piva next_free_idx = -1;
290*94c4a1e1SFrank Piva }
291*94c4a1e1SFrank Piva
292*94c4a1e1SFrank Piva /*
293*94c4a1e1SFrank Piva * __flush() is just one worker, state check/update is done before calling
294*94c4a1e1SFrank Piva * __flush()
295*94c4a1e1SFrank Piva */
__flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len,bool run_fsync)296*94c4a1e1SFrank Piva int Qcow2MappingMeta::__flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
297*94c4a1e1SFrank Piva u64 off, u32 len, bool run_fsync)
298*94c4a1e1SFrank Piva {
299*94c4a1e1SFrank Piva int fd = qs.img.fd;
300*94c4a1e1SFrank Piva u32 qid = ioc.get_qid();
301*94c4a1e1SFrank Piva u32 tag = ioc.get_tag();
302*94c4a1e1SFrank Piva const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
303*94c4a1e1SFrank Piva struct io_uring_sqe *sqe, *sqe2;
304*94c4a1e1SFrank Piva unsigned mio_id;
305*94c4a1e1SFrank Piva
306*94c4a1e1SFrank Piva qcow2_assert(flags & QCOW2_META_DIRTY);
307*94c4a1e1SFrank Piva
308*94c4a1e1SFrank Piva if (!(flags & QCOW2_META_UPDATE))
309*94c4a1e1SFrank Piva ublk_err( "%s %s: buf isn't update\n", __func__,
310*94c4a1e1SFrank Piva typeid(*this).name());
311*94c4a1e1SFrank Piva
312*94c4a1e1SFrank Piva if (off < offset || off >= offset + buf_sz) {
313*94c4a1e1SFrank Piva ublk_err( "%s %s: offset %" PRIx64 " is wrong\n", __func__,
314*94c4a1e1SFrank Piva typeid(*this).name(), offset);
315*94c4a1e1SFrank Piva return -EINVAL;
316*94c4a1e1SFrank Piva }
317*94c4a1e1SFrank Piva
318*94c4a1e1SFrank Piva if (len > offset + buf_sz - off) {
319*94c4a1e1SFrank Piva ublk_err( "%s %s: len %x is wrong\n", __func__,
320*94c4a1e1SFrank Piva typeid(*this).name(), len);
321*94c4a1e1SFrank Piva return -EINVAL;
322*94c4a1e1SFrank Piva }
323*94c4a1e1SFrank Piva
324*94c4a1e1SFrank Piva sqe = io_uring_get_sqe(q->ring_ptr);
325*94c4a1e1SFrank Piva if (!sqe) {
326*94c4a1e1SFrank Piva ublk_err( "%s %s: not get sqe allocated",
327*94c4a1e1SFrank Piva __func__, typeid(*this).name());
328*94c4a1e1SFrank Piva return -ENOMEM;
329*94c4a1e1SFrank Piva }
330*94c4a1e1SFrank Piva
331*94c4a1e1SFrank Piva if (run_fsync) {
332*94c4a1e1SFrank Piva sqe2 = io_uring_get_sqe(q->ring_ptr);
333*94c4a1e1SFrank Piva if (!sqe2) {
334*94c4a1e1SFrank Piva ublk_err( "%s %s: not get sqe2 allocated",
335*94c4a1e1SFrank Piva __func__, typeid(*this).name());
336*94c4a1e1SFrank Piva return -ENOMEM;
337*94c4a1e1SFrank Piva }
338*94c4a1e1SFrank Piva io_uring_prep_fsync(sqe2, fd, IORING_FSYNC_DATASYNC);
339*94c4a1e1SFrank Piva sqe2->user_data = build_user_data(0xffff, IORING_OP_FSYNC, 0, 1);
340*94c4a1e1SFrank Piva sqe2->flags |= IOSQE_IO_LINK;
341*94c4a1e1SFrank Piva }
342*94c4a1e1SFrank Piva
343*94c4a1e1SFrank Piva mio_id = qs.add_meta_io(qid, this);
344*94c4a1e1SFrank Piva
345*94c4a1e1SFrank Piva io_uring_prep_write(sqe, fd, (void *)((u64)addr + (off - offset)),
346*94c4a1e1SFrank Piva len, off);
347*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag, IORING_OP_WRITE, mio_id + 1, 1);
348*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: flushing %p tag %d off %lx sz %d flags %x refcnt %d\n",
349*94c4a1e1SFrank Piva __func__, typeid(*this).name(), this, tag, off,
350*94c4a1e1SFrank Piva len, flags, read_ref());
351*94c4a1e1SFrank Piva return 1;
352*94c4a1e1SFrank Piva }
353*94c4a1e1SFrank Piva
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)354*94c4a1e1SFrank Piva void Qcow2MappingMeta::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
355*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe)
356*94c4a1e1SFrank Piva {
357*94c4a1e1SFrank Piva u32 tag = user_data_to_tag(cqe->user_data);
358*94c4a1e1SFrank Piva u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
359*94c4a1e1SFrank Piva u32 op = user_data_to_op(cqe->user_data);
360*94c4a1e1SFrank Piva
361*94c4a1e1SFrank Piva qs.del_meta_io(q->q_id, meta_id);
362*94c4a1e1SFrank Piva
363*94c4a1e1SFrank Piva //zero my cluster needn't to wakeup events on me
364*94c4a1e1SFrank Piva if (op != IORING_OP_FALLOCATE)
365*94c4a1e1SFrank Piva wakeup_all(q, tag);
366*94c4a1e1SFrank Piva }
367*94c4a1e1SFrank Piva
Qcow2TopTable(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 f)368*94c4a1e1SFrank Piva Qcow2TopTable::Qcow2TopTable(Qcow2State &qs, u64 off, u32 buf_sz,
369*94c4a1e1SFrank Piva const char *cls_name, u32 f):
370*94c4a1e1SFrank Piva Qcow2MappingMeta(qs, off, buf_sz, cls_name, f),
371*94c4a1e1SFrank Piva min_bs_bits(qs.min_bs_bits),
372*94c4a1e1SFrank Piva dirty(qs.get_l1_table_max_size() >> qs.min_bs_bits)
373*94c4a1e1SFrank Piva {
374*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META_L1, "%s: %s dirty size %zd %u/%u\n",
375*94c4a1e1SFrank Piva __func__,
376*94c4a1e1SFrank Piva cls_name, dirty.size(),
377*94c4a1e1SFrank Piva qs.get_l1_table_max_size(),qs.min_bs_bits);
378*94c4a1e1SFrank Piva for (int i = 0; i < dirty.size(); i++)
379*94c4a1e1SFrank Piva dirty[i] = false;
380*94c4a1e1SFrank Piva }
381*94c4a1e1SFrank Piva
prep_flush(const qcow2_io_ctx_t & ioc,u32 blk_idx)382*94c4a1e1SFrank Piva bool Qcow2TopTable::prep_flush(const qcow2_io_ctx_t &ioc, u32 blk_idx)
383*94c4a1e1SFrank Piva {
384*94c4a1e1SFrank Piva if (!(flags & QCOW2_META_DIRTY))
385*94c4a1e1SFrank Piva return false;
386*94c4a1e1SFrank Piva
387*94c4a1e1SFrank Piva //so far, just allow one in-progress unit for l1/refcount table
388*94c4a1e1SFrank Piva if (flags & QCOW2_META_FLUSHING)
389*94c4a1e1SFrank Piva return false;
390*94c4a1e1SFrank Piva
391*94c4a1e1SFrank Piva flags |= QCOW2_META_FLUSHING;
392*94c4a1e1SFrank Piva return true;
393*94c4a1e1SFrank Piva }
394*94c4a1e1SFrank Piva
unprep_flush(u32 blk_idx)395*94c4a1e1SFrank Piva void Qcow2TopTable::unprep_flush(u32 blk_idx) {
396*94c4a1e1SFrank Piva flags &= ~QCOW2_META_FLUSHING;
397*94c4a1e1SFrank Piva }
398*94c4a1e1SFrank Piva
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)399*94c4a1e1SFrank Piva void Qcow2TopTable::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
400*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe)
401*94c4a1e1SFrank Piva {
402*94c4a1e1SFrank Piva u32 op = user_data_to_op(cqe->user_data);
403*94c4a1e1SFrank Piva
404*94c4a1e1SFrank Piva //only for write l1 or refcount table
405*94c4a1e1SFrank Piva qcow2_assert(op == IORING_OP_WRITE);
406*94c4a1e1SFrank Piva
407*94c4a1e1SFrank Piva unprep_flush(get_flush_blk_idx());
408*94c4a1e1SFrank Piva
409*94c4a1e1SFrank Piva if (cqe->res < 0)
410*94c4a1e1SFrank Piva return;
411*94c4a1e1SFrank Piva
412*94c4a1e1SFrank Piva set_blk_dirty(get_flush_blk_idx(), false);
413*94c4a1e1SFrank Piva
414*94c4a1e1SFrank Piva Qcow2MappingMeta::io_done(qs, q, cqe);
415*94c4a1e1SFrank Piva }
416*94c4a1e1SFrank Piva
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)417*94c4a1e1SFrank Piva int Qcow2TopTable::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
418*94c4a1e1SFrank Piva u64 off, u32 len)
419*94c4a1e1SFrank Piva {
420*94c4a1e1SFrank Piva int blk_idx = (off - offset) >> min_bs_bits;
421*94c4a1e1SFrank Piva int ret;
422*94c4a1e1SFrank Piva
423*94c4a1e1SFrank Piva qcow2_assert(len == 512 && blk_idx < dirty.size());
424*94c4a1e1SFrank Piva
425*94c4a1e1SFrank Piva if (!prep_flush(ioc, blk_idx))
426*94c4a1e1SFrank Piva return 0;
427*94c4a1e1SFrank Piva
428*94c4a1e1SFrank Piva if (!get_blk_dirty(blk_idx)) {
429*94c4a1e1SFrank Piva ret = 0;
430*94c4a1e1SFrank Piva goto exit;
431*94c4a1e1SFrank Piva }
432*94c4a1e1SFrank Piva
433*94c4a1e1SFrank Piva set_flush_blk_idx(blk_idx);
434*94c4a1e1SFrank Piva
435*94c4a1e1SFrank Piva //need to run fsync before writting l1/refcount table, so
436*94c4a1e1SFrank Piva //that write order between top and l2/refcount blk is respected
437*94c4a1e1SFrank Piva ret = Qcow2MappingMeta::__flush(qs, ioc, off, len, true);
438*94c4a1e1SFrank Piva exit:
439*94c4a1e1SFrank Piva if (ret <= 0)
440*94c4a1e1SFrank Piva unprep_flush(blk_idx);
441*94c4a1e1SFrank Piva return ret;
442*94c4a1e1SFrank Piva }
443*94c4a1e1SFrank Piva
has_dirty_slices(Qcow2State & qs,int idx)444*94c4a1e1SFrank Piva bool Qcow2TopTable::has_dirty_slices(Qcow2State &qs, int idx)
445*94c4a1e1SFrank Piva {
446*94c4a1e1SFrank Piva u64 entry = get_entry(idx);
447*94c4a1e1SFrank Piva u64 start, end, step, offset;
448*94c4a1e1SFrank Piva
449*94c4a1e1SFrank Piva if (!entry)
450*94c4a1e1SFrank Piva return false;
451*94c4a1e1SFrank Piva
452*94c4a1e1SFrank Piva if (is_mapping_meta())
453*94c4a1e1SFrank Piva step = 1ULL << (QCOW2_PARA::L2_TABLE_SLICE_BITS - 3 +
454*94c4a1e1SFrank Piva qs.header.cluster_bits);
455*94c4a1e1SFrank Piva else
456*94c4a1e1SFrank Piva step = 1ULL << (QCOW2_PARA::REFCOUNT_BLK_SLICE_BITS - 3 +
457*94c4a1e1SFrank Piva qs.header.cluster_bits);
458*94c4a1e1SFrank Piva
459*94c4a1e1SFrank Piva start = ((u64)idx) << single_entry_order();
460*94c4a1e1SFrank Piva end = start + (1ULL << single_entry_order());
461*94c4a1e1SFrank Piva for (offset = start; offset < end; offset += step) {
462*94c4a1e1SFrank Piva Qcow2SliceMeta *t;
463*94c4a1e1SFrank Piva
464*94c4a1e1SFrank Piva if (is_mapping_meta())
465*94c4a1e1SFrank Piva t = qs.cluster_map.__find_slice(offset);
466*94c4a1e1SFrank Piva else
467*94c4a1e1SFrank Piva t = qs.cluster_allocator.__find_slice(offset);
468*94c4a1e1SFrank Piva
469*94c4a1e1SFrank Piva if (t && t->get_dirty(-1))
470*94c4a1e1SFrank Piva return true;
471*94c4a1e1SFrank Piva }
472*94c4a1e1SFrank Piva
473*94c4a1e1SFrank Piva return false;
474*94c4a1e1SFrank Piva }
475*94c4a1e1SFrank Piva
Qcow2L1Table(Qcow2State & qs)476*94c4a1e1SFrank Piva Qcow2L1Table::Qcow2L1Table(Qcow2State &qs): Qcow2TopTable(qs,
477*94c4a1e1SFrank Piva qs.get_l1_table_offset(), qs.get_l1_table_max_size(),
478*94c4a1e1SFrank Piva typeid(*this).name(), QCOW2_META_TOP | QCOW2_META_MAPPING)
479*94c4a1e1SFrank Piva {
480*94c4a1e1SFrank Piva }
481*94c4a1e1SFrank Piva
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)482*94c4a1e1SFrank Piva int Qcow2L1Table::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
483*94c4a1e1SFrank Piva {
484*94c4a1e1SFrank Piva int ret;
485*94c4a1e1SFrank Piva
486*94c4a1e1SFrank Piva ret = Qcow2Meta::load(qs, ioc, len, sync);
487*94c4a1e1SFrank Piva if (ret < 0)
488*94c4a1e1SFrank Piva ublk_err( "%s %s: load failed %d", __func__,
489*94c4a1e1SFrank Piva typeid(*this).name(), ret);
490*94c4a1e1SFrank Piva return ret;
491*94c4a1e1SFrank Piva }
492*94c4a1e1SFrank Piva
dump()493*94c4a1e1SFrank Piva void Qcow2L1Table::dump()
494*94c4a1e1SFrank Piva {
495*94c4a1e1SFrank Piva qcow2_log("%s %s: sizeof %zd\n", __func__, typeid(*this).name(),
496*94c4a1e1SFrank Piva sizeof(*this));
497*94c4a1e1SFrank Piva for (int i = 0; i < header.get_l1_size(); i++)
498*94c4a1e1SFrank Piva qcow2_log("%d: %lx\n", i, get_entry(i));
499*94c4a1e1SFrank Piva }
500*94c4a1e1SFrank Piva
get_entry(u32 idx)501*94c4a1e1SFrank Piva u64 Qcow2L1Table::get_entry(u32 idx) {
502*94c4a1e1SFrank Piva return get_entry_fast(idx);
503*94c4a1e1SFrank Piva }
504*94c4a1e1SFrank Piva
set_entry(u32 idx,u64 val)505*94c4a1e1SFrank Piva void Qcow2L1Table::set_entry(u32 idx, u64 val) {
506*94c4a1e1SFrank Piva set_entry_fast(idx, val);
507*94c4a1e1SFrank Piva }
508*94c4a1e1SFrank Piva
Qcow2RefcountTable(Qcow2State & qs)509*94c4a1e1SFrank Piva Qcow2RefcountTable::Qcow2RefcountTable(Qcow2State &qs):
510*94c4a1e1SFrank Piva Qcow2TopTable(qs, qs.get_refcount_table_offset(),
511*94c4a1e1SFrank Piva qs.get_refcount_table_max_size(),
512*94c4a1e1SFrank Piva typeid(*this).name(), QCOW2_META_TOP)
513*94c4a1e1SFrank Piva {
514*94c4a1e1SFrank Piva }
515*94c4a1e1SFrank Piva
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)516*94c4a1e1SFrank Piva int Qcow2RefcountTable::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
517*94c4a1e1SFrank Piva u32 len, bool sync)
518*94c4a1e1SFrank Piva {
519*94c4a1e1SFrank Piva int ret;
520*94c4a1e1SFrank Piva
521*94c4a1e1SFrank Piva ret = Qcow2Meta::load(qs, ioc, len, sync);
522*94c4a1e1SFrank Piva if (ret < 0)
523*94c4a1e1SFrank Piva ublk_err( "%s %s: load failed %d", __func__,
524*94c4a1e1SFrank Piva typeid(*this).name(), ret);
525*94c4a1e1SFrank Piva return ret;
526*94c4a1e1SFrank Piva }
527*94c4a1e1SFrank Piva
get_entry(u32 idx)528*94c4a1e1SFrank Piva u64 Qcow2RefcountTable::get_entry(u32 idx) {
529*94c4a1e1SFrank Piva return get_entry_fast(idx);
530*94c4a1e1SFrank Piva }
531*94c4a1e1SFrank Piva
set_entry(u32 idx,u64 val)532*94c4a1e1SFrank Piva void Qcow2RefcountTable::set_entry(u32 idx, u64 val) {
533*94c4a1e1SFrank Piva set_entry_fast(idx, val);
534*94c4a1e1SFrank Piva }
535*94c4a1e1SFrank Piva
dump()536*94c4a1e1SFrank Piva void Qcow2RefcountTable::dump()
537*94c4a1e1SFrank Piva {
538*94c4a1e1SFrank Piva qcow2_log("%s %s: sizeof %zd\n", __func__, typeid(*this).name(),
539*94c4a1e1SFrank Piva sizeof(*this));
540*94c4a1e1SFrank Piva for (int i = 0; i < data_len / 8; i++) {
541*94c4a1e1SFrank Piva u64 entry = get_entry(i);
542*94c4a1e1SFrank Piva
543*94c4a1e1SFrank Piva if (entry != 0)
544*94c4a1e1SFrank Piva qcow2_log("%d: %lx\n", i, entry);
545*94c4a1e1SFrank Piva }
546*94c4a1e1SFrank Piva }
547*94c4a1e1SFrank Piva
Qcow2SliceMeta(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 p_idx,u32 f)548*94c4a1e1SFrank Piva Qcow2SliceMeta::Qcow2SliceMeta(Qcow2State &qs, u64 off, u32 buf_sz,
549*94c4a1e1SFrank Piva const char *cls_name, u32 p_idx, u32 f):
550*94c4a1e1SFrank Piva Qcow2MappingMeta(qs, off, buf_sz, cls_name, f),
551*94c4a1e1SFrank Piva parent_idx(p_idx)
552*94c4a1e1SFrank Piva {
553*94c4a1e1SFrank Piva #ifdef QCOW2_CACHE_DEBUG
554*94c4a1e1SFrank Piva qcow2_log("slice meta %llx/%p/%d allocated\n", off, addr, buf_sz);
555*94c4a1e1SFrank Piva #endif
556*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
557*94c4a1e1SFrank Piva if (posix_memalign((void **)&validate_addr, getpagesize(), buf_sz))
558*94c4a1e1SFrank Piva ublk_err( "%s: allocate validate memory %d bytes failed\n",
559*94c4a1e1SFrank Piva __func__, buf_sz);
560*94c4a1e1SFrank Piva #endif
561*94c4a1e1SFrank Piva }
562*94c4a1e1SFrank Piva
~Qcow2SliceMeta()563*94c4a1e1SFrank Piva Qcow2SliceMeta::~Qcow2SliceMeta() {
564*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
565*94c4a1e1SFrank Piva free(validate_addr);
566*94c4a1e1SFrank Piva #endif
567*94c4a1e1SFrank Piva }
568*94c4a1e1SFrank Piva
prep_flush(const qcow2_io_ctx_t & ioc)569*94c4a1e1SFrank Piva bool Qcow2SliceMeta::prep_flush(const qcow2_io_ctx_t &ioc)
570*94c4a1e1SFrank Piva {
571*94c4a1e1SFrank Piva if (!(flags & QCOW2_META_DIRTY))
572*94c4a1e1SFrank Piva return false;
573*94c4a1e1SFrank Piva
574*94c4a1e1SFrank Piva if (flags & QCOW2_META_FLUSHING) {
575*94c4a1e1SFrank Piva add_waiter(ioc.get_tag());
576*94c4a1e1SFrank Piva throw MetaUpdateException();
577*94c4a1e1SFrank Piva }
578*94c4a1e1SFrank Piva flags |= QCOW2_META_FLUSHING;
579*94c4a1e1SFrank Piva return true;
580*94c4a1e1SFrank Piva }
581*94c4a1e1SFrank Piva
unprep_flush()582*94c4a1e1SFrank Piva void Qcow2SliceMeta::unprep_flush() {
583*94c4a1e1SFrank Piva flags &= ~QCOW2_META_FLUSHING;
584*94c4a1e1SFrank Piva }
585*94c4a1e1SFrank Piva
zero_my_cluster(Qcow2State & qs,const qcow2_io_ctx_t & ioc)586*94c4a1e1SFrank Piva int Qcow2SliceMeta::zero_my_cluster(Qcow2State &qs,
587*94c4a1e1SFrank Piva const qcow2_io_ctx_t &ioc)
588*94c4a1e1SFrank Piva {
589*94c4a1e1SFrank Piva u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
590*94c4a1e1SFrank Piva Qcow2ClusterState *s = qs.cluster_allocator.get_cluster_state(
591*94c4a1e1SFrank Piva cluster_off);
592*94c4a1e1SFrank Piva u32 qid = ioc.get_qid();
593*94c4a1e1SFrank Piva u32 tag = ioc.get_tag();
594*94c4a1e1SFrank Piva const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
595*94c4a1e1SFrank Piva int fd = q->dev->tgt.fds[1];
596*94c4a1e1SFrank Piva struct io_uring_sqe *sqe;
597*94c4a1e1SFrank Piva int mode = FALLOC_FL_ZERO_RANGE;
598*94c4a1e1SFrank Piva unsigned mio_id;
599*94c4a1e1SFrank Piva
600*94c4a1e1SFrank Piva if (s == nullptr)
601*94c4a1e1SFrank Piva return 0;
602*94c4a1e1SFrank Piva
603*94c4a1e1SFrank Piva if (s->get_state() >= QCOW2_ALLOC_ZEROED)
604*94c4a1e1SFrank Piva return 0;
605*94c4a1e1SFrank Piva
606*94c4a1e1SFrank Piva if (s->get_state() == QCOW2_ALLOC_ZEROING) {
607*94c4a1e1SFrank Piva s->add_waiter(ioc.get_tag());
608*94c4a1e1SFrank Piva throw MetaUpdateException();
609*94c4a1e1SFrank Piva }
610*94c4a1e1SFrank Piva
611*94c4a1e1SFrank Piva sqe = io_uring_get_sqe(q->ring_ptr);
612*94c4a1e1SFrank Piva if (!sqe) {
613*94c4a1e1SFrank Piva ublk_err("%s: tag %d offset %" PRIu64 "op %d, no sqe for zeroing\n",
614*94c4a1e1SFrank Piva __func__, tag, offset, IORING_OP_FALLOCATE);
615*94c4a1e1SFrank Piva return -ENOMEM;
616*94c4a1e1SFrank Piva }
617*94c4a1e1SFrank Piva
618*94c4a1e1SFrank Piva get_ref();
619*94c4a1e1SFrank Piva
620*94c4a1e1SFrank Piva mio_id = qs.add_meta_io(qid, this);
621*94c4a1e1SFrank Piva s->set_state(QCOW2_ALLOC_ZEROING);
622*94c4a1e1SFrank Piva io_uring_prep_fallocate(sqe, fd, mode, cluster_off,
623*94c4a1e1SFrank Piva (1ULL << qs.header.cluster_bits));
624*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag,
625*94c4a1e1SFrank Piva IORING_OP_FALLOCATE, mio_id + 1, 1);
626*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: zeroing %p tag %d off %lx sz %d flags %x ref %d\n",
627*94c4a1e1SFrank Piva __func__, typeid(*this).name(), this, tag, cluster_off,
628*94c4a1e1SFrank Piva (1ULL << qs.header.cluster_bits), flags, refcnt);
629*94c4a1e1SFrank Piva return 1;
630*94c4a1e1SFrank Piva }
631*94c4a1e1SFrank Piva
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)632*94c4a1e1SFrank Piva int Qcow2SliceMeta::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
633*94c4a1e1SFrank Piva u32 len, bool sync)
634*94c4a1e1SFrank Piva {
635*94c4a1e1SFrank Piva int ret = -EINVAL;
636*94c4a1e1SFrank Piva u32 qid = ioc.get_qid();
637*94c4a1e1SFrank Piva u32 tag = ioc.get_tag();
638*94c4a1e1SFrank Piva const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
639*94c4a1e1SFrank Piva struct io_uring_sqe *sqe;
640*94c4a1e1SFrank Piva int mio_id;
641*94c4a1e1SFrank Piva
642*94c4a1e1SFrank Piva if (sync) {
643*94c4a1e1SFrank Piva ublk_err( "%s %s: we only support async load",
644*94c4a1e1SFrank Piva __func__, typeid(*this).name());
645*94c4a1e1SFrank Piva return -EINVAL;
646*94c4a1e1SFrank Piva }
647*94c4a1e1SFrank Piva
648*94c4a1e1SFrank Piva if (flags & QCOW2_META_UPDATE) {
649*94c4a1e1SFrank Piva ublk_err( "%s %s: we are update, need to load?",
650*94c4a1e1SFrank Piva __func__, typeid(*this).name());
651*94c4a1e1SFrank Piva return -EINVAL;
652*94c4a1e1SFrank Piva }
653*94c4a1e1SFrank Piva
654*94c4a1e1SFrank Piva sqe = io_uring_get_sqe(q->ring_ptr);
655*94c4a1e1SFrank Piva if (!sqe) {
656*94c4a1e1SFrank Piva ublk_err( "%s %s: not get sqe allocated",
657*94c4a1e1SFrank Piva __func__, typeid(*this).name());
658*94c4a1e1SFrank Piva return ret;
659*94c4a1e1SFrank Piva }
660*94c4a1e1SFrank Piva
661*94c4a1e1SFrank Piva get_ref();
662*94c4a1e1SFrank Piva
663*94c4a1e1SFrank Piva mio_id = qs.add_meta_io(qid, this);
664*94c4a1e1SFrank Piva
665*94c4a1e1SFrank Piva io_uring_prep_read(sqe, 1, (void *)addr, buf_sz, offset);
666*94c4a1e1SFrank Piva sqe->flags = IOSQE_FIXED_FILE;
667*94c4a1e1SFrank Piva /* meta io id starts from one and zero is reserved for plain ublk io */
668*94c4a1e1SFrank Piva sqe->user_data = build_user_data(tag, IORING_OP_READ, mio_id + 1, 1);
669*94c4a1e1SFrank Piva
670*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s: queue io op %d(%llx %x %llx)"
671*94c4a1e1SFrank Piva " (qid %d tag %u, cmd_op %u target: %d tgt_data %d)\n",
672*94c4a1e1SFrank Piva __func__, sqe->opcode, sqe->off, sqe->len, sqe->addr,
673*94c4a1e1SFrank Piva q->q_id, tag, sqe->opcode, 1, mio_id + 1);
674*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: loading %p tag %d off %lx sz %d flags %x ref %d\n",
675*94c4a1e1SFrank Piva __func__, typeid(*this).name(), this, tag,
676*94c4a1e1SFrank Piva offset, buf_sz, flags, refcnt);
677*94c4a1e1SFrank Piva
678*94c4a1e1SFrank Piva return 0;
679*94c4a1e1SFrank Piva }
680*94c4a1e1SFrank Piva
681*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
io_done_validate(Qcow2State & qs,const struct ublksrv_queue * q,struct io_uring_cqe * cqe)682*94c4a1e1SFrank Piva void Qcow2SliceMeta::io_done_validate(Qcow2State &qs, const struct ublksrv_queue *q,
683*94c4a1e1SFrank Piva struct io_uring_cqe *cqe)
684*94c4a1e1SFrank Piva {
685*94c4a1e1SFrank Piva u32 tag = user_data_to_tag(cqe->user_data);
686*94c4a1e1SFrank Piva u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
687*94c4a1e1SFrank Piva u32 op = user_data_to_op(cqe->user_data);
688*94c4a1e1SFrank Piva u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
689*94c4a1e1SFrank Piva bool res;
690*94c4a1e1SFrank Piva
691*94c4a1e1SFrank Piva //for write, buffer data has been saved to validate_addr before
692*94c4a1e1SFrank Piva //submitting the WRITE io
693*94c4a1e1SFrank Piva if (op != IORING_OP_WRITE) {
694*94c4a1e1SFrank Piva lseek(qs.img.fd, offset, SEEK_SET);
695*94c4a1e1SFrank Piva read(qs.img.fd, validate_addr, buf_sz);
696*94c4a1e1SFrank Piva }
697*94c4a1e1SFrank Piva
698*94c4a1e1SFrank Piva if (op == IORING_OP_FALLOCATE) {
699*94c4a1e1SFrank Piva for (int i = 0; i < buf_sz; i++) {
700*94c4a1e1SFrank Piva char *buf = (char *)validate_addr;
701*94c4a1e1SFrank Piva
702*94c4a1e1SFrank Piva qcow2_assert(buf[i] == 0);
703*94c4a1e1SFrank Piva }
704*94c4a1e1SFrank Piva } else if (op == IORING_OP_WRITE || op == IORING_OP_READ) {
705*94c4a1e1SFrank Piva unsigned long *buf = (unsigned long *)addr;
706*94c4a1e1SFrank Piva unsigned long *buf2 = (unsigned long *)validate_addr;
707*94c4a1e1SFrank Piva
708*94c4a1e1SFrank Piva res = bcmp(addr, validate_addr, buf_sz);
709*94c4a1e1SFrank Piva
710*94c4a1e1SFrank Piva if (res == 0)
711*94c4a1e1SFrank Piva return;
712*94c4a1e1SFrank Piva
713*94c4a1e1SFrank Piva for (int i = 0; i < buf_sz / 8; i++) {
714*94c4a1e1SFrank Piva if (buf[i] != buf2[i]) {
715*94c4a1e1SFrank Piva qcow2_log("%s: not same in %d %lx %lx\n",
716*94c4a1e1SFrank Piva __func__, i, buf[i], buf2[i]);
717*94c4a1e1SFrank Piva qcow2_log("%s: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
718*94c4a1e1SFrank Piva __func__, tag, meta_id, op, this,
719*94c4a1e1SFrank Piva get_flags(), get_offset(),
720*94c4a1e1SFrank Piva refcnt, cqe->res);
721*94c4a1e1SFrank Piva }
722*94c4a1e1SFrank Piva }
723*94c4a1e1SFrank Piva qcow2_assert(0);
724*94c4a1e1SFrank Piva }
725*94c4a1e1SFrank Piva }
726*94c4a1e1SFrank Piva #endif
727*94c4a1e1SFrank Piva
728*94c4a1e1SFrank Piva /* called for both load() and flush() */
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)729*94c4a1e1SFrank Piva void Qcow2SliceMeta::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
730*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe)
731*94c4a1e1SFrank Piva {
732*94c4a1e1SFrank Piva u32 tag = user_data_to_tag(cqe->user_data);
733*94c4a1e1SFrank Piva u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
734*94c4a1e1SFrank Piva u32 op = user_data_to_op(cqe->user_data);
735*94c4a1e1SFrank Piva u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
736*94c4a1e1SFrank Piva
737*94c4a1e1SFrank Piva if (cqe->res < 0) {
738*94c4a1e1SFrank Piva qcow2_log("%s: failure: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
739*94c4a1e1SFrank Piva __func__, tag, meta_id, op, this,
740*94c4a1e1SFrank Piva get_flags(), get_offset(), refcnt, cqe->res);
741*94c4a1e1SFrank Piva //zeroing the cluster for holding me is done
742*94c4a1e1SFrank Piva if (op == IORING_OP_FALLOCATE) {
743*94c4a1e1SFrank Piva if (qs.cluster_allocator.
744*94c4a1e1SFrank Piva alloc_cluster_reset(cluster_off))
745*94c4a1e1SFrank Piva goto exit;
746*94c4a1e1SFrank Piva } else if (op == IORING_OP_WRITE) {
747*94c4a1e1SFrank Piva unprep_flush();
748*94c4a1e1SFrank Piva goto exit;
749*94c4a1e1SFrank Piva } else
750*94c4a1e1SFrank Piva goto exit;
751*94c4a1e1SFrank Piva }
752*94c4a1e1SFrank Piva
753*94c4a1e1SFrank Piva io_done_validate(qs, q, cqe);
754*94c4a1e1SFrank Piva
755*94c4a1e1SFrank Piva if (op == IORING_OP_READ)
756*94c4a1e1SFrank Piva set_update(true);
757*94c4a1e1SFrank Piva else if (op == IORING_OP_WRITE) {
758*94c4a1e1SFrank Piva unprep_flush();
759*94c4a1e1SFrank Piva qs.meta_flushing.dec_dirtied_slice(is_mapping_meta());
760*94c4a1e1SFrank Piva set_dirty(-1, false);
761*94c4a1e1SFrank Piva set_prep_flush(false);
762*94c4a1e1SFrank Piva } else if (op == IORING_OP_FALLOCATE)
763*94c4a1e1SFrank Piva qs.cluster_allocator.alloc_cluster_zeroed(q, tag, cluster_off);
764*94c4a1e1SFrank Piva else
765*94c4a1e1SFrank Piva ublk_err( "%s: unknown op: tag %d op %d meta_id %d res %d\n",
766*94c4a1e1SFrank Piva __func__, tag, op, meta_id, cqe->res);
767*94c4a1e1SFrank Piva
768*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
769*94c4a1e1SFrank Piva __func__, tag, meta_id, op, this,
770*94c4a1e1SFrank Piva get_flags(), get_offset(), refcnt, cqe->res);
771*94c4a1e1SFrank Piva
772*94c4a1e1SFrank Piva //wake up waiters
773*94c4a1e1SFrank Piva Qcow2MappingMeta::io_done(qs, q, cqe);
774*94c4a1e1SFrank Piva
775*94c4a1e1SFrank Piva //if it is evicted, now it is ready to free it
776*94c4a1e1SFrank Piva if ((op == IORING_OP_WRITE) && cqe->res >= 0 && get_evicted())
777*94c4a1e1SFrank Piva qs.add_slice_to_free_list(this);
778*94c4a1e1SFrank Piva
779*94c4a1e1SFrank Piva exit:
780*94c4a1e1SFrank Piva //drop the reference grabbed in either load() or flush()
781*94c4a1e1SFrank Piva put_ref();
782*94c4a1e1SFrank Piva return;
783*94c4a1e1SFrank Piva }
784*94c4a1e1SFrank Piva
wait_clusters(Qcow2State & qs,const qcow2_io_ctx_t & ioc)785*94c4a1e1SFrank Piva void Qcow2SliceMeta::wait_clusters(Qcow2State &qs,
786*94c4a1e1SFrank Piva const qcow2_io_ctx_t &ioc)
787*94c4a1e1SFrank Piva {
788*94c4a1e1SFrank Piva for (int i = 0; i < get_nr_entries(); i++) {
789*94c4a1e1SFrank Piva u64 entry = get_entry(i);
790*94c4a1e1SFrank Piva
791*94c4a1e1SFrank Piva if (entry) {
792*94c4a1e1SFrank Piva u64 cluster_off;
793*94c4a1e1SFrank Piva
794*94c4a1e1SFrank Piva //mapping meta means this is one l2 table, otherwise
795*94c4a1e1SFrank Piva //it is one refcount block table
796*94c4a1e1SFrank Piva if (is_mapping_meta())
797*94c4a1e1SFrank Piva cluster_off = entry & L1E_OFFSET_MASK;
798*94c4a1e1SFrank Piva else
799*94c4a1e1SFrank Piva cluster_off = virt_offset() + (u64)i << qs.header.cluster_bits;
800*94c4a1e1SFrank Piva
801*94c4a1e1SFrank Piva Qcow2ClusterState *s = qs.cluster_allocator.
802*94c4a1e1SFrank Piva get_cluster_state(cluster_off);
803*94c4a1e1SFrank Piva
804*94c4a1e1SFrank Piva if (s == nullptr)
805*94c4a1e1SFrank Piva continue;
806*94c4a1e1SFrank Piva
807*94c4a1e1SFrank Piva if (s->get_state() < QCOW2_ALLOC_ZEROED) {
808*94c4a1e1SFrank Piva s->add_waiter(ioc.get_tag());
809*94c4a1e1SFrank Piva throw MetaUpdateException();
810*94c4a1e1SFrank Piva }
811*94c4a1e1SFrank Piva }
812*94c4a1e1SFrank Piva }
813*94c4a1e1SFrank Piva }
814*94c4a1e1SFrank Piva
reclaim_me()815*94c4a1e1SFrank Piva void Qcow2SliceMeta::reclaim_me()
816*94c4a1e1SFrank Piva {
817*94c4a1e1SFrank Piva unsigned queues = header.qs.dev_info->nr_hw_queues;
818*94c4a1e1SFrank Piva
819*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s: %p off %llx flags %x\n", __func__,
820*94c4a1e1SFrank Piva this, get_offset(), flags);
821*94c4a1e1SFrank Piva
822*94c4a1e1SFrank Piva header.qs.remove_slice_from_evicted_list(this);
823*94c4a1e1SFrank Piva
824*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META, "%s: %p off %llx\n", __func__, this, get_offset());
825*94c4a1e1SFrank Piva
826*94c4a1e1SFrank Piva //Tell the whole world, I am leaving
827*94c4a1e1SFrank Piva for (int i = 0; i < queues; i++) {
828*94c4a1e1SFrank Piva const struct ublksrv_queue *q = ublksrv_get_queue(header.qs.dev, i);
829*94c4a1e1SFrank Piva
830*94c4a1e1SFrank Piva wakeup_all(q, -1);
831*94c4a1e1SFrank Piva }
832*94c4a1e1SFrank Piva header.qs.reclaim_slice(this);
833*94c4a1e1SFrank Piva }
834*94c4a1e1SFrank Piva
Qcow2RefcountBlock(Qcow2State & qs,u64 off,u32 p_idx,u32 f)835*94c4a1e1SFrank Piva Qcow2RefcountBlock::Qcow2RefcountBlock(Qcow2State &qs, u64 off, u32 p_idx, u32 f):
836*94c4a1e1SFrank Piva Qcow2SliceMeta(qs, off, QCOW2_PARA::REFCOUNT_BLK_SLICE_BYTES,
837*94c4a1e1SFrank Piva typeid(*this).name(), p_idx, f),
838*94c4a1e1SFrank Piva dirty_start_idx((unsigned)-1)
839*94c4a1e1SFrank Piva {
840*94c4a1e1SFrank Piva entry_bits_order = qs.header.refcount_order;
841*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META_RB, "rb meta %p %llx -> %llx \n", this, virt_offset(), off);
842*94c4a1e1SFrank Piva }
843*94c4a1e1SFrank Piva
844*94c4a1e1SFrank Piva
reset(Qcow2State & qs,u64 off,u32 p_idx,u32 f)845*94c4a1e1SFrank Piva void Qcow2RefcountBlock::reset(Qcow2State &qs, u64 off, u32 p_idx, u32 f)
846*94c4a1e1SFrank Piva {
847*94c4a1e1SFrank Piva Qcow2RefcountBlock tmp(qs, off, p_idx, f | QCOW2_META_DONT_ALLOC_BUF);
848*94c4a1e1SFrank Piva
849*94c4a1e1SFrank Piva qcow2_assert(refcnt == 0);
850*94c4a1e1SFrank Piva
851*94c4a1e1SFrank Piva offset = tmp.get_offset();
852*94c4a1e1SFrank Piva flags = tmp.get_flags() & ~QCOW2_META_DONT_ALLOC_BUF;
853*94c4a1e1SFrank Piva refcnt = tmp.read_ref();
854*94c4a1e1SFrank Piva
855*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META_RB, "%s: %p refcnt %d flags %x offset %lx \n",
856*94c4a1e1SFrank Piva __func__, this, refcnt, flags, offset);
857*94c4a1e1SFrank Piva
858*94c4a1e1SFrank Piva next_free_idx = tmp.get_next_free_idx();
859*94c4a1e1SFrank Piva
860*94c4a1e1SFrank Piva parent_idx = tmp.parent_idx;
861*94c4a1e1SFrank Piva
862*94c4a1e1SFrank Piva dirty_start_idx = tmp.dirty_start_idx;
863*94c4a1e1SFrank Piva }
864*94c4a1e1SFrank Piva
get_entry(u32 idx)865*94c4a1e1SFrank Piva u64 Qcow2RefcountBlock::get_entry(u32 idx) {
866*94c4a1e1SFrank Piva return get_entry_fast(idx);
867*94c4a1e1SFrank Piva }
868*94c4a1e1SFrank Piva
set_entry(u32 idx,u64 val)869*94c4a1e1SFrank Piva void Qcow2RefcountBlock::set_entry(u32 idx, u64 val) {
870*94c4a1e1SFrank Piva set_entry_fast(idx, val);
871*94c4a1e1SFrank Piva
872*94c4a1e1SFrank Piva if (is_flushing() || !get_update()) {
873*94c4a1e1SFrank Piva qcow2_log("BUG %s: obj %p flags %x off %lx\n",
874*94c4a1e1SFrank Piva __func__, this, flags, offset);
875*94c4a1e1SFrank Piva qcow2_assert(0);
876*94c4a1e1SFrank Piva }
877*94c4a1e1SFrank Piva }
878*94c4a1e1SFrank Piva
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)879*94c4a1e1SFrank Piva int Qcow2RefcountBlock::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
880*94c4a1e1SFrank Piva u64 off, u32 len)
881*94c4a1e1SFrank Piva {
882*94c4a1e1SFrank Piva int ret;
883*94c4a1e1SFrank Piva
884*94c4a1e1SFrank Piva //wait_clusters(qs, ioc);
885*94c4a1e1SFrank Piva
886*94c4a1e1SFrank Piva if (!prep_flush(ioc))
887*94c4a1e1SFrank Piva return 0;
888*94c4a1e1SFrank Piva
889*94c4a1e1SFrank Piva //flush can't be started unless the above two are done
890*94c4a1e1SFrank Piva //
891*94c4a1e1SFrank Piva //the ref is released in io_done()
892*94c4a1e1SFrank Piva get_ref();
893*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
894*94c4a1e1SFrank Piva memcpy(validate_addr, addr, buf_sz);
895*94c4a1e1SFrank Piva #endif
896*94c4a1e1SFrank Piva ret = Qcow2MappingMeta::__flush(qs, ioc, off, len);
897*94c4a1e1SFrank Piva if (ret <= 0) {
898*94c4a1e1SFrank Piva unprep_flush();
899*94c4a1e1SFrank Piva put_ref();
900*94c4a1e1SFrank Piva }
901*94c4a1e1SFrank Piva return ret;
902*94c4a1e1SFrank Piva }
903*94c4a1e1SFrank Piva
~Qcow2RefcountBlock()904*94c4a1e1SFrank Piva Qcow2RefcountBlock::~Qcow2RefcountBlock()
905*94c4a1e1SFrank Piva {
906*94c4a1e1SFrank Piva }
907*94c4a1e1SFrank Piva
get_dirty_range(u64 * start,u64 * end)908*94c4a1e1SFrank Piva void Qcow2RefcountBlock::get_dirty_range(u64 *start, u64 *end)
909*94c4a1e1SFrank Piva {
910*94c4a1e1SFrank Piva *start = 1;
911*94c4a1e1SFrank Piva *end = 0;
912*94c4a1e1SFrank Piva }
913*94c4a1e1SFrank Piva
dump()914*94c4a1e1SFrank Piva void Qcow2RefcountBlock::dump()
915*94c4a1e1SFrank Piva {
916*94c4a1e1SFrank Piva unsigned cnt = 0;
917*94c4a1e1SFrank Piva int f = -1, l;
918*94c4a1e1SFrank Piva for (int i = 0; i < get_nr_entries(); i++) {
919*94c4a1e1SFrank Piva u64 entry = get_entry(i);
920*94c4a1e1SFrank Piva
921*94c4a1e1SFrank Piva if (entry != 0) {
922*94c4a1e1SFrank Piva if (f == -1)
923*94c4a1e1SFrank Piva f = i;
924*94c4a1e1SFrank Piva l = i;
925*94c4a1e1SFrank Piva cnt++; //qcow2_log("%d: %lx\n", i, entry);
926*94c4a1e1SFrank Piva }
927*94c4a1e1SFrank Piva }
928*94c4a1e1SFrank Piva
929*94c4a1e1SFrank Piva if (!cnt)
930*94c4a1e1SFrank Piva return;
931*94c4a1e1SFrank Piva
932*94c4a1e1SFrank Piva qcow2_log("%s %s: buf_sz %u offset %" PRIx64 " sizeof %zd entries %u parent_idx %u virt_off %" PRIx64 " flags %x\n",
933*94c4a1e1SFrank Piva __func__, typeid(*this).name(), buf_sz, offset, sizeof(*this),
934*94c4a1e1SFrank Piva cnt, parent_idx, virt_offset(),
935*94c4a1e1SFrank Piva flags);
936*94c4a1e1SFrank Piva qcow2_log("\t [%d] = %" PRIx64 "/%" PRIx64 " [%d] = %" PRIx64 "/%" PRIx64 "\n",
937*94c4a1e1SFrank Piva f, get_entry(f),
938*94c4a1e1SFrank Piva virt_offset() + (f << header.cluster_bits),
939*94c4a1e1SFrank Piva l, get_entry(l),
940*94c4a1e1SFrank Piva virt_offset() + (l << header.cluster_bits));
941*94c4a1e1SFrank Piva }
942*94c4a1e1SFrank Piva
Qcow2L2Table(Qcow2State & qs,u64 off,u32 p_idx,u32 f)943*94c4a1e1SFrank Piva Qcow2L2Table::Qcow2L2Table(Qcow2State &qs, u64 off, u32 p_idx, u32 f):
944*94c4a1e1SFrank Piva Qcow2SliceMeta(qs, off, QCOW2_PARA::L2_TABLE_SLICE_BYTES,
945*94c4a1e1SFrank Piva typeid(*this).name(), p_idx, f | QCOW2_META_MAPPING)
946*94c4a1e1SFrank Piva {
947*94c4a1e1SFrank Piva if (header.is_extended_l2_entries())
948*94c4a1e1SFrank Piva entry_bits_order <<= 1;
949*94c4a1e1SFrank Piva dirty_start = (u64)-1;
950*94c4a1e1SFrank Piva dirty_end = 0;
951*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META_L2, "l2 meta %p %llx -> %llx \n", this, virt_offset(), off);
952*94c4a1e1SFrank Piva }
953*94c4a1e1SFrank Piva
reset(Qcow2State & qs,u64 off,u32 p_idx,u32 f)954*94c4a1e1SFrank Piva void Qcow2L2Table::reset(Qcow2State &qs, u64 off, u32 p_idx, u32 f)
955*94c4a1e1SFrank Piva {
956*94c4a1e1SFrank Piva Qcow2L2Table tmp(qs, off, p_idx, f | QCOW2_META_DONT_ALLOC_BUF);
957*94c4a1e1SFrank Piva
958*94c4a1e1SFrank Piva qcow2_assert(refcnt == 0);
959*94c4a1e1SFrank Piva
960*94c4a1e1SFrank Piva offset = tmp.get_offset();
961*94c4a1e1SFrank Piva flags = tmp.get_flags() & ~QCOW2_META_DONT_ALLOC_BUF;
962*94c4a1e1SFrank Piva refcnt = tmp.read_ref();
963*94c4a1e1SFrank Piva
964*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_META_L2, "%s: %p refcnt %d flags %x offset %lx \n",
965*94c4a1e1SFrank Piva __func__, this, refcnt, flags, offset);
966*94c4a1e1SFrank Piva
967*94c4a1e1SFrank Piva next_free_idx = tmp.get_next_free_idx();
968*94c4a1e1SFrank Piva
969*94c4a1e1SFrank Piva parent_idx = tmp.parent_idx;
970*94c4a1e1SFrank Piva
971*94c4a1e1SFrank Piva tmp.get_dirty_range(&dirty_start, &dirty_end);
972*94c4a1e1SFrank Piva }
973*94c4a1e1SFrank Piva
~Qcow2L2Table()974*94c4a1e1SFrank Piva Qcow2L2Table::~Qcow2L2Table()
975*94c4a1e1SFrank Piva {
976*94c4a1e1SFrank Piva }
977*94c4a1e1SFrank Piva
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)978*94c4a1e1SFrank Piva void Qcow2L2Table::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
979*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe)
980*94c4a1e1SFrank Piva {
981*94c4a1e1SFrank Piva get_ref();
982*94c4a1e1SFrank Piva Qcow2SliceMeta::io_done(qs, q, cqe);
983*94c4a1e1SFrank Piva check(qs, __func__, __LINE__);
984*94c4a1e1SFrank Piva put_ref();
985*94c4a1e1SFrank Piva }
986*94c4a1e1SFrank Piva
get_entry(u32 idx)987*94c4a1e1SFrank Piva u64 Qcow2L2Table::get_entry(u32 idx) {
988*94c4a1e1SFrank Piva return get_entry_fast(idx);
989*94c4a1e1SFrank Piva }
990*94c4a1e1SFrank Piva
get_dirty_range(u64 * start,u64 * end)991*94c4a1e1SFrank Piva void Qcow2L2Table::get_dirty_range(u64 *start, u64 *end)
992*94c4a1e1SFrank Piva {
993*94c4a1e1SFrank Piva *start = dirty_start;
994*94c4a1e1SFrank Piva *end = dirty_end;
995*94c4a1e1SFrank Piva }
996*94c4a1e1SFrank Piva
set_entry(u32 idx,u64 val)997*94c4a1e1SFrank Piva void Qcow2L2Table::set_entry(u32 idx, u64 val) {
998*94c4a1e1SFrank Piva set_entry_fast(idx, val);
999*94c4a1e1SFrank Piva
1000*94c4a1e1SFrank Piva if (is_flushing() || !get_update()) {
1001*94c4a1e1SFrank Piva qcow2_log("BUG %s: obj %p flags %x off %lx\n",
1002*94c4a1e1SFrank Piva __func__, this, flags, offset);
1003*94c4a1e1SFrank Piva qcow2_assert(0);
1004*94c4a1e1SFrank Piva }
1005*94c4a1e1SFrank Piva
1006*94c4a1e1SFrank Piva val &= L2E_OFFSET_MASK;
1007*94c4a1e1SFrank Piva
1008*94c4a1e1SFrank Piva qcow2_assert(!(val & ((1ULL << header.cluster_bits) - 1)));
1009*94c4a1e1SFrank Piva
1010*94c4a1e1SFrank Piva if (val < dirty_start)
1011*94c4a1e1SFrank Piva dirty_start = val;
1012*94c4a1e1SFrank Piva if (val > dirty_end)
1013*94c4a1e1SFrank Piva dirty_end = val;
1014*94c4a1e1SFrank Piva }
1015*94c4a1e1SFrank Piva
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)1016*94c4a1e1SFrank Piva int Qcow2L2Table::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
1017*94c4a1e1SFrank Piva u64 off, u32 len)
1018*94c4a1e1SFrank Piva {
1019*94c4a1e1SFrank Piva int ret;
1020*94c4a1e1SFrank Piva
1021*94c4a1e1SFrank Piva wait_clusters(qs, ioc);
1022*94c4a1e1SFrank Piva
1023*94c4a1e1SFrank Piva if (!prep_flush(ioc))
1024*94c4a1e1SFrank Piva return 0;
1025*94c4a1e1SFrank Piva
1026*94c4a1e1SFrank Piva //flush can't be started unless the above two are done
1027*94c4a1e1SFrank Piva //
1028*94c4a1e1SFrank Piva //the ref is released in io_done()
1029*94c4a1e1SFrank Piva get_ref();
1030*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
1031*94c4a1e1SFrank Piva memcpy(validate_addr, addr, buf_sz);
1032*94c4a1e1SFrank Piva check_duplicated_clusters(qs, ioc.get_tag(), __func__, __LINE__);
1033*94c4a1e1SFrank Piva #endif
1034*94c4a1e1SFrank Piva ret = Qcow2MappingMeta::__flush(qs, ioc, off, len);
1035*94c4a1e1SFrank Piva if (ret <= 0) {
1036*94c4a1e1SFrank Piva unprep_flush();
1037*94c4a1e1SFrank Piva put_ref();
1038*94c4a1e1SFrank Piva }
1039*94c4a1e1SFrank Piva return ret;
1040*94c4a1e1SFrank Piva }
1041*94c4a1e1SFrank Piva
dump()1042*94c4a1e1SFrank Piva void Qcow2L2Table::dump()
1043*94c4a1e1SFrank Piva {
1044*94c4a1e1SFrank Piva unsigned cnt = 0;
1045*94c4a1e1SFrank Piva int f = -1, l;
1046*94c4a1e1SFrank Piva
1047*94c4a1e1SFrank Piva for (int i = 0; i < get_nr_entries(); i++) {
1048*94c4a1e1SFrank Piva u64 entry = get_entry(i);
1049*94c4a1e1SFrank Piva
1050*94c4a1e1SFrank Piva if (entry != 0) {
1051*94c4a1e1SFrank Piva if (f == -1)
1052*94c4a1e1SFrank Piva f = i;
1053*94c4a1e1SFrank Piva l = i;
1054*94c4a1e1SFrank Piva cnt++; //qcow2_log("%d: %lx\n", i, entry);
1055*94c4a1e1SFrank Piva }
1056*94c4a1e1SFrank Piva }
1057*94c4a1e1SFrank Piva
1058*94c4a1e1SFrank Piva if (!cnt)
1059*94c4a1e1SFrank Piva return;
1060*94c4a1e1SFrank Piva
1061*94c4a1e1SFrank Piva qcow2_log("%s %s: buf_sz %u offset %" PRIx64 " sizeof %zd entries %u parent_idx %u virt_off %" PRIx64 " flags %x\n",
1062*94c4a1e1SFrank Piva __func__, typeid(*this).name(), buf_sz, offset, sizeof(*this),
1063*94c4a1e1SFrank Piva cnt, parent_idx, virt_offset(), flags);
1064*94c4a1e1SFrank Piva qcow2_log("\t [%d] = %" PRIx64 "[%u] = %" PRIx64 "\n", f,
1065*94c4a1e1SFrank Piva get_entry(f), l, get_entry(l));
1066*94c4a1e1SFrank Piva }
1067*94c4a1e1SFrank Piva
1068*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
check(Qcow2State & qs,const char * func,int line)1069*94c4a1e1SFrank Piva void Qcow2L2Table::check(Qcow2State &qs, const char *func, int line)
1070*94c4a1e1SFrank Piva {
1071*94c4a1e1SFrank Piva int i, cnt = 0;
1072*94c4a1e1SFrank Piva bool bad = false;
1073*94c4a1e1SFrank Piva
1074*94c4a1e1SFrank Piva if (!get_update())
1075*94c4a1e1SFrank Piva return;
1076*94c4a1e1SFrank Piva
1077*94c4a1e1SFrank Piva //don't check evicted obj, which can't be used by anyone
1078*94c4a1e1SFrank Piva if (get_evicted())
1079*94c4a1e1SFrank Piva return;
1080*94c4a1e1SFrank Piva
1081*94c4a1e1SFrank Piva for (i = 0; i < get_nr_entries(); i++) {
1082*94c4a1e1SFrank Piva u64 entry = get_entry(i) & ((1ULL << 63) - 1);
1083*94c4a1e1SFrank Piva
1084*94c4a1e1SFrank Piva if (entry == 0)
1085*94c4a1e1SFrank Piva continue;
1086*94c4a1e1SFrank Piva
1087*94c4a1e1SFrank Piva cnt++;
1088*94c4a1e1SFrank Piva
1089*94c4a1e1SFrank Piva if (entry + (1ULL << qs.header.cluster_bits) >
1090*94c4a1e1SFrank Piva qs.cluster_allocator.max_physical_size) {
1091*94c4a1e1SFrank Piva qcow2_log("%s %d: entry %llx(parent idx %d, idx %d) offset %llx is too big\n",
1092*94c4a1e1SFrank Piva func, line, entry, parent_idx, i,
1093*94c4a1e1SFrank Piva get_offset());
1094*94c4a1e1SFrank Piva bad = true;
1095*94c4a1e1SFrank Piva }
1096*94c4a1e1SFrank Piva
1097*94c4a1e1SFrank Piva if (entry & ((1ULL << qs.header.cluster_bits) - 1)) {
1098*94c4a1e1SFrank Piva qcow2_log("%s: entry %llx(parent idx %d, idx %d) offset %llx isn't aligned\n",
1099*94c4a1e1SFrank Piva func, line, entry, parent_idx, i,
1100*94c4a1e1SFrank Piva get_offset());
1101*94c4a1e1SFrank Piva bad = true;
1102*94c4a1e1SFrank Piva }
1103*94c4a1e1SFrank Piva }
1104*94c4a1e1SFrank Piva
1105*94c4a1e1SFrank Piva if (bad) {
1106*94c4a1e1SFrank Piva qcow2_log("%s %s: %p buf_sz %u offset %llx sizeof %d parent_idx %u virt_off %llx flags %x refcnt %d\n",
1107*94c4a1e1SFrank Piva __func__, typeid(*this).name(), this, buf_sz, offset, sizeof(*this),
1108*94c4a1e1SFrank Piva parent_idx, virt_offset(), flags, read_ref());
1109*94c4a1e1SFrank Piva qcow2_log("\t total entries %d\n", cnt);
1110*94c4a1e1SFrank Piva assert(0);
1111*94c4a1e1SFrank Piva }
1112*94c4a1e1SFrank Piva }
1113*94c4a1e1SFrank Piva
check_duplicated_clusters(Qcow2State & qs,int tag,const char * func,int line)1114*94c4a1e1SFrank Piva void Qcow2L2Table::check_duplicated_clusters(Qcow2State &qs, int tag,
1115*94c4a1e1SFrank Piva const char *func, int line)
1116*94c4a1e1SFrank Piva {
1117*94c4a1e1SFrank Piva for (int i = 0; i < get_nr_entries(); i++) {
1118*94c4a1e1SFrank Piva u64 entry = get_entry(i);
1119*94c4a1e1SFrank Piva
1120*94c4a1e1SFrank Piva if (entry != 0) {
1121*94c4a1e1SFrank Piva u64 host_off = entry & ((1ULL << 63) - 1);
1122*94c4a1e1SFrank Piva u64 virt_off = virt_offset() + (((u64)i) <<
1123*94c4a1e1SFrank Piva qs.header.cluster_bits);
1124*94c4a1e1SFrank Piva
1125*94c4a1e1SFrank Piva if (qs.validate_cluster_map(host_off, virt_off))
1126*94c4a1e1SFrank Piva continue;
1127*94c4a1e1SFrank Piva qcow2_log("BUG %s %d: tag %d obj %p flags %x off %lx virt_off "
1128*94c4a1e1SFrank Piva "%lx(#%d) parent_idx %d\n",
1129*94c4a1e1SFrank Piva func, line, tag, this, flags, offset,
1130*94c4a1e1SFrank Piva virt_offset(), i, parent_idx);
1131*94c4a1e1SFrank Piva qcow2_assert(0);
1132*94c4a1e1SFrank Piva }
1133*94c4a1e1SFrank Piva }
1134*94c4a1e1SFrank Piva }
1135*94c4a1e1SFrank Piva #endif
1136