xref: /aosp_15_r20/external/ublksrv/qcow2/qcow2_meta.cpp (revision 94c4a1e103eb1715230460aab379dff275992c20)
1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #include <cassert>
3*94c4a1e1SFrank Piva 
4*94c4a1e1SFrank Piva #include "qcow2.h"
5*94c4a1e1SFrank Piva #include "ublksrv_tgt.h"
6*94c4a1e1SFrank Piva 
7*94c4a1e1SFrank Piva 
8*94c4a1e1SFrank Piva // refcnt is for slice only, and initialize it as two, one is for submission
9*94c4a1e1SFrank Piva // side, another is for free side. This way guarantees that the returned slice
10*94c4a1e1SFrank Piva // from alloc_slice is always valid
Qcow2Meta(Qcow2Header & h,u64 off,u32 sz,const char * name,u32 f)11*94c4a1e1SFrank Piva Qcow2Meta::Qcow2Meta(Qcow2Header &h, u64 off, u32 sz, const char *name, u32 f):
12*94c4a1e1SFrank Piva 	header(h), offset(off), buf_sz(sz), flags(f), refcnt(2)
13*94c4a1e1SFrank Piva {
14*94c4a1e1SFrank Piva 	//used for implementing slice's ->reset() only
15*94c4a1e1SFrank Piva 	if (f & QCOW2_META_DONT_ALLOC_BUF)
16*94c4a1e1SFrank Piva 		return;
17*94c4a1e1SFrank Piva 
18*94c4a1e1SFrank Piva 	if (posix_memalign((void **)&addr, getpagesize(), sz))
19*94c4a1e1SFrank Piva 		ublk_err( "allocate memory %d bytes failed, %s\n",
20*94c4a1e1SFrank Piva 				sz, name);
21*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
22*94c4a1e1SFrank Piva 	id = name;
23*94c4a1e1SFrank Piva 	qcow2_log("%s: constructed, obj %p, buf size %d off %lx flags %x\n",
24*94c4a1e1SFrank Piva 			name, this, sz, off, flags);
25*94c4a1e1SFrank Piva #endif
26*94c4a1e1SFrank Piva }
27*94c4a1e1SFrank Piva 
show(const char * func,int line)28*94c4a1e1SFrank Piva void Qcow2Meta::show(const char *func, int line)
29*94c4a1e1SFrank Piva {
30*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
31*94c4a1e1SFrank Piva 	qcow2_log("%s:%d id %s obj %p flags %x off %lx ref %d\n",
32*94c4a1e1SFrank Piva 			func, line, id, this, flags, offset, refcnt);
33*94c4a1e1SFrank Piva #else
34*94c4a1e1SFrank Piva 	qcow2_log("%s:%d obj %p flags %x off %lx ref %d\n",
35*94c4a1e1SFrank Piva 			func, line, this, flags, offset, refcnt);
36*94c4a1e1SFrank Piva #endif
37*94c4a1e1SFrank Piva }
38*94c4a1e1SFrank Piva 
~Qcow2Meta()39*94c4a1e1SFrank Piva Qcow2Meta::~Qcow2Meta()
40*94c4a1e1SFrank Piva {
41*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_OBJ
42*94c4a1e1SFrank Piva 	qcow2_log("%s: destructed, obj %p flags %x off %lx ref %d\n",
43*94c4a1e1SFrank Piva 			id, this, flags, offset, refcnt);
44*94c4a1e1SFrank Piva #endif
45*94c4a1e1SFrank Piva 	if (flags & QCOW2_META_DONT_ALLOC_BUF)
46*94c4a1e1SFrank Piva 		return;
47*94c4a1e1SFrank Piva 
48*94c4a1e1SFrank Piva 	if (!is_top_meta() && (get_dirty(-1) || is_flushing() ||
49*94c4a1e1SFrank Piva 				(!get_update() && !get_evicted()))) {
50*94c4a1e1SFrank Piva 		qcow2_log("BUG %s: obj %p flags %x off %lx\n",
51*94c4a1e1SFrank Piva 				__func__, this, flags, offset);
52*94c4a1e1SFrank Piva 		qcow2_assert(0);
53*94c4a1e1SFrank Piva 	}
54*94c4a1e1SFrank Piva 	free(addr);
55*94c4a1e1SFrank Piva }
56*94c4a1e1SFrank Piva 
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)57*94c4a1e1SFrank Piva int Qcow2Meta::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
58*94c4a1e1SFrank Piva {
59*94c4a1e1SFrank Piva 	int fd;
60*94c4a1e1SFrank Piva 
61*94c4a1e1SFrank Piva 	if (addr == NULL)
62*94c4a1e1SFrank Piva 		return -EINVAL;
63*94c4a1e1SFrank Piva 	if (len > buf_sz) {
64*94c4a1e1SFrank Piva 		ublk_err( "%s %s: load too much %d(%d) \n",
65*94c4a1e1SFrank Piva 				__func__, typeid(*this).name(), len, buf_sz);
66*94c4a1e1SFrank Piva 		return -EINVAL;
67*94c4a1e1SFrank Piva 	}
68*94c4a1e1SFrank Piva 	if (!sync)
69*94c4a1e1SFrank Piva 		return -EOPNOTSUPP;
70*94c4a1e1SFrank Piva 
71*94c4a1e1SFrank Piva 	//qcow2_log("%s: read %s offset %llx len %lu  \n", __func__,
72*94c4a1e1SFrank Piva 	//		typeid(*this).name(), offset, len);
73*94c4a1e1SFrank Piva 	fd = qs.img.fd;
74*94c4a1e1SFrank Piva 	lseek(fd, offset, SEEK_SET);
75*94c4a1e1SFrank Piva 	data_len = read(fd, addr, len);
76*94c4a1e1SFrank Piva 	if (data_len != len)
77*94c4a1e1SFrank Piva 		qcow2_log("%s: read %u(%u)\n", __func__, len, data_len);
78*94c4a1e1SFrank Piva 	if (data_len > 0)
79*94c4a1e1SFrank Piva 		flags |= QCOW2_META_UPDATE;
80*94c4a1e1SFrank Piva 	return data_len;
81*94c4a1e1SFrank Piva }
82*94c4a1e1SFrank Piva 
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)83*94c4a1e1SFrank Piva int Qcow2Meta::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u64 off,
84*94c4a1e1SFrank Piva 		u32 len)
85*94c4a1e1SFrank Piva {
86*94c4a1e1SFrank Piva 	int fd = qs.img.fd;
87*94c4a1e1SFrank Piva 	int ret;
88*94c4a1e1SFrank Piva 
89*94c4a1e1SFrank Piva 	if (!(flags & QCOW2_META_DIRTY))
90*94c4a1e1SFrank Piva 		return 0;
91*94c4a1e1SFrank Piva 
92*94c4a1e1SFrank Piva 	if (!(flags & QCOW2_META_UPDATE))
93*94c4a1e1SFrank Piva 		ublk_err( "%s %s: buf isn't update\n", __func__,
94*94c4a1e1SFrank Piva 				typeid(*this).name());
95*94c4a1e1SFrank Piva 
96*94c4a1e1SFrank Piva 	//qcow2_log("%s: write %s offset %llx len %lu  \n", __func__,
97*94c4a1e1SFrank Piva 	//		typeid(*this).name(), offset, buf_sz);
98*94c4a1e1SFrank Piva 	lseek(fd, off, SEEK_SET);
99*94c4a1e1SFrank Piva 	ret = write(fd, addr, len);
100*94c4a1e1SFrank Piva 	if (len != ret)
101*94c4a1e1SFrank Piva 		qcow2_log("%s: write %u(%u)\n", __func__, len, ret);
102*94c4a1e1SFrank Piva 	if (ret > 0)
103*94c4a1e1SFrank Piva 		flags &= ~QCOW2_META_DIRTY;
104*94c4a1e1SFrank Piva 
105*94c4a1e1SFrank Piva 	return len;
106*94c4a1e1SFrank Piva }
107*94c4a1e1SFrank Piva 
zero_buf()108*94c4a1e1SFrank Piva void Qcow2Meta::zero_buf() {
109*94c4a1e1SFrank Piva 	memset((void *)addr, 0, buf_sz);
110*94c4a1e1SFrank Piva }
111*94c4a1e1SFrank Piva 
112*94c4a1e1SFrank Piva // Base class is constructed first, then follows member class/objects,
113*94c4a1e1SFrank Piva // and member classes are done in the order of their declaration,
114*94c4a1e1SFrank Piva // so here __a can be setup correctly.
Qcow2HeaderExtFeatureNameTable(char * addr,u64 offset)115*94c4a1e1SFrank Piva Qcow2HeaderExtFeatureNameTable::Qcow2HeaderExtFeatureNameTable(
116*94c4a1e1SFrank Piva 		char *addr, u64 offset): Qcow2HeaderExt(addr, offset),
117*94c4a1e1SFrank Piva 	__a(len / sizeof(struct feature_entry))
118*94c4a1e1SFrank Piva {
119*94c4a1e1SFrank Piva 	unsigned off = offset;
120*94c4a1e1SFrank Piva 
121*94c4a1e1SFrank Piva 	for (int i = 0; i < __a.size(); i++) {
122*94c4a1e1SFrank Piva 		__a[i].feature_type = *(addr + off + 8);
123*94c4a1e1SFrank Piva 		__a[i].bit_num = *(addr + off + 9);
124*94c4a1e1SFrank Piva 		strncpy(__a[i].feature_name, addr + off + 10, 46);
125*94c4a1e1SFrank Piva 		off += 48;
126*94c4a1e1SFrank Piva 	}
127*94c4a1e1SFrank Piva }
128*94c4a1e1SFrank Piva 
dump() const129*94c4a1e1SFrank Piva void Qcow2HeaderExtFeatureNameTable::dump() const
130*94c4a1e1SFrank Piva {
131*94c4a1e1SFrank Piva 	Qcow2HeaderExt::dump();
132*94c4a1e1SFrank Piva 
133*94c4a1e1SFrank Piva 	for (int i = 0; i < __a.size(); i++)
134*94c4a1e1SFrank Piva 		qcow2_log("\t %d: type %x bit_num %u name %s\n",
135*94c4a1e1SFrank Piva 			i, __a[i].feature_type, __a[i].bit_num,
136*94c4a1e1SFrank Piva 			__a[i].feature_name);
137*94c4a1e1SFrank Piva }
138*94c4a1e1SFrank Piva 
Qcow2Header(Qcow2State & state)139*94c4a1e1SFrank Piva Qcow2Header::Qcow2Header(Qcow2State &state): Qcow2Meta(*this, 0, 4096,
140*94c4a1e1SFrank Piva 	typeid(this).name(), 0), magic(0), version(0), cluster_bits(0),
141*94c4a1e1SFrank Piva 	refcount_order(0), qs(state)
142*94c4a1e1SFrank Piva {
143*94c4a1e1SFrank Piva 	backingfile_format_name = NULL;
144*94c4a1e1SFrank Piva 	feature_name_table = NULL;
145*94c4a1e1SFrank Piva 	enc_header_pointer = NULL;
146*94c4a1e1SFrank Piva 	bitmaps = NULL;
147*94c4a1e1SFrank Piva 	ext_data_file_name = NULL;
148*94c4a1e1SFrank Piva 
149*94c4a1e1SFrank Piva 	load(state, 0, buf_sz, true);
150*94c4a1e1SFrank Piva }
151*94c4a1e1SFrank Piva 
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)152*94c4a1e1SFrank Piva int Qcow2Header::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u64 off,
153*94c4a1e1SFrank Piva 			u32 len)
154*94c4a1e1SFrank Piva {
155*94c4a1e1SFrank Piva 	return Qcow2Meta::flush(qs, ioc, off, len);
156*94c4a1e1SFrank Piva }
157*94c4a1e1SFrank Piva 
~Qcow2Header()158*94c4a1e1SFrank Piva Qcow2Header::~Qcow2Header()
159*94c4a1e1SFrank Piva {
160*94c4a1e1SFrank Piva 	delete	backingfile_format_name;
161*94c4a1e1SFrank Piva 	delete	feature_name_table;
162*94c4a1e1SFrank Piva 	delete	enc_header_pointer;
163*94c4a1e1SFrank Piva 	delete	bitmaps;
164*94c4a1e1SFrank Piva 	delete	ext_data_file_name;
165*94c4a1e1SFrank Piva }
166*94c4a1e1SFrank Piva 
dump_ext() const167*94c4a1e1SFrank Piva void Qcow2Header::dump_ext() const
168*94c4a1e1SFrank Piva {
169*94c4a1e1SFrank Piva 	if (backingfile_format_name)
170*94c4a1e1SFrank Piva 		backingfile_format_name->dump();
171*94c4a1e1SFrank Piva 
172*94c4a1e1SFrank Piva 	if (ext_data_file_name)
173*94c4a1e1SFrank Piva 		ext_data_file_name->dump();
174*94c4a1e1SFrank Piva 
175*94c4a1e1SFrank Piva 	if (feature_name_table)
176*94c4a1e1SFrank Piva 		feature_name_table->dump();
177*94c4a1e1SFrank Piva 
178*94c4a1e1SFrank Piva 	if (bitmaps)
179*94c4a1e1SFrank Piva 		bitmaps->dump();
180*94c4a1e1SFrank Piva 
181*94c4a1e1SFrank Piva 	if (enc_header_pointer)
182*94c4a1e1SFrank Piva 		enc_header_pointer->dump();
183*94c4a1e1SFrank Piva }
184*94c4a1e1SFrank Piva 
185*94c4a1e1SFrank Piva /*
186*94c4a1e1SFrank Piva  * populate header extensions
187*94c4a1e1SFrank Piva  *
188*94c4a1e1SFrank Piva  * The header may take more than 4k, which should be decided by
189*94c4a1e1SFrank Piva  * backing_file_offset & backing_file_size __or__ populate
190*94c4a1e1SFrank Piva  * header extensions.
191*94c4a1e1SFrank Piva  */
populate()192*94c4a1e1SFrank Piva int Qcow2Header::populate()
193*94c4a1e1SFrank Piva {
194*94c4a1e1SFrank Piva 	char *buf = (char *)addr;
195*94c4a1e1SFrank Piva 	u64 start = (get_header_length() + 7) & ~0x7ULL;
196*94c4a1e1SFrank Piva 	u32 *p_magic =  const_cast<u32 *> (&magic);
197*94c4a1e1SFrank Piva 	u32 *p_version =  const_cast<u32 *> (&version);
198*94c4a1e1SFrank Piva 	u32 *p_cluster_bits = const_cast<u32 *> (&cluster_bits);
199*94c4a1e1SFrank Piva 	u32 *p_refcount_order = const_cast<u32 *> (&refcount_order);
200*94c4a1e1SFrank Piva 
201*94c4a1e1SFrank Piva 	*p_magic = get_magic();
202*94c4a1e1SFrank Piva 	*p_version = get_version();
203*94c4a1e1SFrank Piva 	*p_cluster_bits = get_cluster_bits();
204*94c4a1e1SFrank Piva 	*p_refcount_order = get_refcount_order();
205*94c4a1e1SFrank Piva 
206*94c4a1e1SFrank Piva 	if (version == 2)
207*94c4a1e1SFrank Piva 		goto exit;
208*94c4a1e1SFrank Piva 
209*94c4a1e1SFrank Piva 	//todo: populate extensions
210*94c4a1e1SFrank Piva 	while (true) {
211*94c4a1e1SFrank Piva 		Qcow2HeaderExt ext(buf, start);
212*94c4a1e1SFrank Piva 
213*94c4a1e1SFrank Piva 		switch (ext.type) {
214*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_END:
215*94c4a1e1SFrank Piva 			goto exit;
216*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_BACKING_FORMAT:
217*94c4a1e1SFrank Piva 			this->backingfile_format_name =
218*94c4a1e1SFrank Piva 				new Qcow2HeaderExtString(buf, start);
219*94c4a1e1SFrank Piva 			break;
220*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_FEATURE_TABLE:
221*94c4a1e1SFrank Piva 			this->feature_name_table =
222*94c4a1e1SFrank Piva 				new Qcow2HeaderExtFeatureNameTable(
223*94c4a1e1SFrank Piva 						buf, start);
224*94c4a1e1SFrank Piva 			break;
225*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_CRYPTO_HEADER:
226*94c4a1e1SFrank Piva 			this->enc_header_pointer =
227*94c4a1e1SFrank Piva 				new Qcow2HeaderExtEncHeader(buf, start);
228*94c4a1e1SFrank Piva 			break;
229*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_BITMAPS:
230*94c4a1e1SFrank Piva 			this->bitmaps =
231*94c4a1e1SFrank Piva 				new Qcow2HeaderExtBitmaps(buf, start);
232*94c4a1e1SFrank Piva 			break;
233*94c4a1e1SFrank Piva 		case QCOW2_EXT_MAGIC_DATA_FILE:
234*94c4a1e1SFrank Piva 			this->ext_data_file_name =
235*94c4a1e1SFrank Piva 				new Qcow2HeaderExtString(buf, start);
236*94c4a1e1SFrank Piva 			break;
237*94c4a1e1SFrank Piva 		};
238*94c4a1e1SFrank Piva 		start += 8 + (ext.len + 7) & ~0x7ULL;
239*94c4a1e1SFrank Piva 	}
240*94c4a1e1SFrank Piva  exit:
241*94c4a1e1SFrank Piva 	return 0;
242*94c4a1e1SFrank Piva }
243*94c4a1e1SFrank Piva 
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)244*94c4a1e1SFrank Piva int Qcow2Header::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
245*94c4a1e1SFrank Piva {
246*94c4a1e1SFrank Piva 	int ret;
247*94c4a1e1SFrank Piva 
248*94c4a1e1SFrank Piva 	ret = Qcow2Meta::load(qs, ioc, len, sync);
249*94c4a1e1SFrank Piva 	if (ret <= 0)
250*94c4a1e1SFrank Piva 		goto fail;
251*94c4a1e1SFrank Piva 
252*94c4a1e1SFrank Piva 	ret = populate();
253*94c4a1e1SFrank Piva 	return ret;
254*94c4a1e1SFrank Piva  fail:
255*94c4a1e1SFrank Piva 	ublk_err( "%s: load failed %d", __func__, ret);
256*94c4a1e1SFrank Piva 	return ret;
257*94c4a1e1SFrank Piva }
258*94c4a1e1SFrank Piva 
operator <<(std::ostream & os,const Qcow2Header & h)259*94c4a1e1SFrank Piva std::ostream & operator<<(std::ostream &os, const Qcow2Header &h)
260*94c4a1e1SFrank Piva {
261*94c4a1e1SFrank Piva 	char buf[256];
262*94c4a1e1SFrank Piva 
263*94c4a1e1SFrank Piva 	sprintf(buf, "magic: %x", h.magic);
264*94c4a1e1SFrank Piva 	std::cout << std::string(buf) << std::endl;
265*94c4a1e1SFrank Piva 	qcow2_log("%s", buf);
266*94c4a1e1SFrank Piva 
267*94c4a1e1SFrank Piva 	sprintf(buf, "version: %x\n", h.version);
268*94c4a1e1SFrank Piva 	std::cout << std::string(buf) << std::endl;
269*94c4a1e1SFrank Piva 	qcow2_log("%s", buf);
270*94c4a1e1SFrank Piva 
271*94c4a1e1SFrank Piva 	sprintf(buf, "cluster_bits: %x\n", h.cluster_bits);
272*94c4a1e1SFrank Piva 	std::cout << std::string(buf) << std::endl;
273*94c4a1e1SFrank Piva 	qcow2_log("%s", buf);
274*94c4a1e1SFrank Piva 
275*94c4a1e1SFrank Piva 	sprintf(buf, "refcount_order: %x\n", h.refcount_order);
276*94c4a1e1SFrank Piva 	std::cout << std::string(buf) << std::endl;
277*94c4a1e1SFrank Piva 	qcow2_log("%s", buf);
278*94c4a1e1SFrank Piva 
279*94c4a1e1SFrank Piva 	return os;
280*94c4a1e1SFrank Piva }
281*94c4a1e1SFrank Piva 
Qcow2MappingMeta(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 f)282*94c4a1e1SFrank Piva Qcow2MappingMeta::Qcow2MappingMeta(Qcow2State &qs, u64 off, u32 buf_sz,
283*94c4a1e1SFrank Piva 		const char *cls_name, u32 f):
284*94c4a1e1SFrank Piva 	Qcow2Meta(qs.header, off, buf_sz, cls_name, f)
285*94c4a1e1SFrank Piva {
286*94c4a1e1SFrank Piva 	//default each entry is 64bits(8bytes) except for:
287*94c4a1e1SFrank Piva 	// extended l2 entry is 128bit, refcount blk has refcount_order
288*94c4a1e1SFrank Piva 	entry_bits_order = 6;
289*94c4a1e1SFrank Piva 	next_free_idx = -1;
290*94c4a1e1SFrank Piva }
291*94c4a1e1SFrank Piva 
292*94c4a1e1SFrank Piva /*
293*94c4a1e1SFrank Piva  * __flush() is just one worker, state check/update is done before calling
294*94c4a1e1SFrank Piva  * __flush()
295*94c4a1e1SFrank Piva  */
__flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len,bool run_fsync)296*94c4a1e1SFrank Piva int Qcow2MappingMeta::__flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
297*94c4a1e1SFrank Piva 		u64 off, u32 len, bool run_fsync)
298*94c4a1e1SFrank Piva {
299*94c4a1e1SFrank Piva 	int fd = qs.img.fd;
300*94c4a1e1SFrank Piva 	u32 qid = ioc.get_qid();
301*94c4a1e1SFrank Piva 	u32 tag = ioc.get_tag();
302*94c4a1e1SFrank Piva 	const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
303*94c4a1e1SFrank Piva 	struct io_uring_sqe *sqe, *sqe2;
304*94c4a1e1SFrank Piva 	unsigned mio_id;
305*94c4a1e1SFrank Piva 
306*94c4a1e1SFrank Piva 	qcow2_assert(flags & QCOW2_META_DIRTY);
307*94c4a1e1SFrank Piva 
308*94c4a1e1SFrank Piva 	if (!(flags & QCOW2_META_UPDATE))
309*94c4a1e1SFrank Piva 		ublk_err( "%s %s: buf isn't update\n", __func__,
310*94c4a1e1SFrank Piva 				typeid(*this).name());
311*94c4a1e1SFrank Piva 
312*94c4a1e1SFrank Piva 	if (off < offset || off >= offset + buf_sz) {
313*94c4a1e1SFrank Piva 		ublk_err( "%s %s: offset %" PRIx64 " is wrong\n", __func__,
314*94c4a1e1SFrank Piva 				typeid(*this).name(), offset);
315*94c4a1e1SFrank Piva 		return -EINVAL;
316*94c4a1e1SFrank Piva 	}
317*94c4a1e1SFrank Piva 
318*94c4a1e1SFrank Piva 	if (len > offset + buf_sz - off) {
319*94c4a1e1SFrank Piva 		ublk_err( "%s %s: len %x is wrong\n", __func__,
320*94c4a1e1SFrank Piva 				typeid(*this).name(), len);
321*94c4a1e1SFrank Piva 		return -EINVAL;
322*94c4a1e1SFrank Piva 	}
323*94c4a1e1SFrank Piva 
324*94c4a1e1SFrank Piva 	sqe = io_uring_get_sqe(q->ring_ptr);
325*94c4a1e1SFrank Piva 	if (!sqe) {
326*94c4a1e1SFrank Piva 		ublk_err( "%s %s: not get sqe allocated",
327*94c4a1e1SFrank Piva 				__func__, typeid(*this).name());
328*94c4a1e1SFrank Piva 		return -ENOMEM;
329*94c4a1e1SFrank Piva 	}
330*94c4a1e1SFrank Piva 
331*94c4a1e1SFrank Piva 	if (run_fsync) {
332*94c4a1e1SFrank Piva 		sqe2 = io_uring_get_sqe(q->ring_ptr);
333*94c4a1e1SFrank Piva 		if (!sqe2) {
334*94c4a1e1SFrank Piva 			ublk_err( "%s %s: not get sqe2 allocated",
335*94c4a1e1SFrank Piva 				__func__, typeid(*this).name());
336*94c4a1e1SFrank Piva 			return -ENOMEM;
337*94c4a1e1SFrank Piva 		}
338*94c4a1e1SFrank Piva 		io_uring_prep_fsync(sqe2, fd, IORING_FSYNC_DATASYNC);
339*94c4a1e1SFrank Piva 		sqe2->user_data = build_user_data(0xffff, IORING_OP_FSYNC, 0, 1);
340*94c4a1e1SFrank Piva 		sqe2->flags |= IOSQE_IO_LINK;
341*94c4a1e1SFrank Piva 	}
342*94c4a1e1SFrank Piva 
343*94c4a1e1SFrank Piva 	mio_id = qs.add_meta_io(qid, this);
344*94c4a1e1SFrank Piva 
345*94c4a1e1SFrank Piva 	io_uring_prep_write(sqe, fd, (void *)((u64)addr + (off - offset)),
346*94c4a1e1SFrank Piva 			len, off);
347*94c4a1e1SFrank Piva 	sqe->user_data = build_user_data(tag, IORING_OP_WRITE, mio_id + 1, 1);
348*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: flushing %p tag %d off %lx sz %d flags %x refcnt %d\n",
349*94c4a1e1SFrank Piva 			__func__, typeid(*this).name(), this, tag, off,
350*94c4a1e1SFrank Piva 			len, flags, read_ref());
351*94c4a1e1SFrank Piva 	return 1;
352*94c4a1e1SFrank Piva }
353*94c4a1e1SFrank Piva 
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)354*94c4a1e1SFrank Piva void Qcow2MappingMeta::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
355*94c4a1e1SFrank Piva 			const struct io_uring_cqe *cqe)
356*94c4a1e1SFrank Piva {
357*94c4a1e1SFrank Piva 	u32 tag = user_data_to_tag(cqe->user_data);
358*94c4a1e1SFrank Piva 	u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
359*94c4a1e1SFrank Piva 	u32 op = user_data_to_op(cqe->user_data);
360*94c4a1e1SFrank Piva 
361*94c4a1e1SFrank Piva 	qs.del_meta_io(q->q_id, meta_id);
362*94c4a1e1SFrank Piva 
363*94c4a1e1SFrank Piva 	//zero my cluster needn't to wakeup events on me
364*94c4a1e1SFrank Piva 	if (op != IORING_OP_FALLOCATE)
365*94c4a1e1SFrank Piva 		wakeup_all(q, tag);
366*94c4a1e1SFrank Piva }
367*94c4a1e1SFrank Piva 
Qcow2TopTable(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 f)368*94c4a1e1SFrank Piva Qcow2TopTable::Qcow2TopTable(Qcow2State &qs, u64 off, u32 buf_sz,
369*94c4a1e1SFrank Piva 		const char *cls_name, u32 f):
370*94c4a1e1SFrank Piva 	Qcow2MappingMeta(qs, off, buf_sz, cls_name, f),
371*94c4a1e1SFrank Piva 	min_bs_bits(qs.min_bs_bits),
372*94c4a1e1SFrank Piva 	dirty(qs.get_l1_table_max_size() >> qs.min_bs_bits)
373*94c4a1e1SFrank Piva {
374*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META_L1, "%s: %s dirty size %zd %u/%u\n",
375*94c4a1e1SFrank Piva 			__func__,
376*94c4a1e1SFrank Piva 			cls_name, dirty.size(),
377*94c4a1e1SFrank Piva 		qs.get_l1_table_max_size(),qs.min_bs_bits);
378*94c4a1e1SFrank Piva 	for (int i = 0; i < dirty.size(); i++)
379*94c4a1e1SFrank Piva 		dirty[i] = false;
380*94c4a1e1SFrank Piva }
381*94c4a1e1SFrank Piva 
prep_flush(const qcow2_io_ctx_t & ioc,u32 blk_idx)382*94c4a1e1SFrank Piva bool Qcow2TopTable::prep_flush(const qcow2_io_ctx_t &ioc, u32 blk_idx)
383*94c4a1e1SFrank Piva {
384*94c4a1e1SFrank Piva 	if (!(flags & QCOW2_META_DIRTY))
385*94c4a1e1SFrank Piva 		return false;
386*94c4a1e1SFrank Piva 
387*94c4a1e1SFrank Piva 	//so far, just allow one in-progress unit for l1/refcount table
388*94c4a1e1SFrank Piva 	if (flags & QCOW2_META_FLUSHING)
389*94c4a1e1SFrank Piva 		return false;
390*94c4a1e1SFrank Piva 
391*94c4a1e1SFrank Piva 	flags |= QCOW2_META_FLUSHING;
392*94c4a1e1SFrank Piva 	return true;
393*94c4a1e1SFrank Piva }
394*94c4a1e1SFrank Piva 
unprep_flush(u32 blk_idx)395*94c4a1e1SFrank Piva void Qcow2TopTable::unprep_flush(u32 blk_idx) {
396*94c4a1e1SFrank Piva 	flags &= ~QCOW2_META_FLUSHING;
397*94c4a1e1SFrank Piva }
398*94c4a1e1SFrank Piva 
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)399*94c4a1e1SFrank Piva void Qcow2TopTable::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
400*94c4a1e1SFrank Piva 			const struct io_uring_cqe *cqe)
401*94c4a1e1SFrank Piva {
402*94c4a1e1SFrank Piva 	u32 op = user_data_to_op(cqe->user_data);
403*94c4a1e1SFrank Piva 
404*94c4a1e1SFrank Piva 	//only for write l1 or refcount table
405*94c4a1e1SFrank Piva 	qcow2_assert(op == IORING_OP_WRITE);
406*94c4a1e1SFrank Piva 
407*94c4a1e1SFrank Piva 	unprep_flush(get_flush_blk_idx());
408*94c4a1e1SFrank Piva 
409*94c4a1e1SFrank Piva 	if (cqe->res < 0)
410*94c4a1e1SFrank Piva 		return;
411*94c4a1e1SFrank Piva 
412*94c4a1e1SFrank Piva 	set_blk_dirty(get_flush_blk_idx(), false);
413*94c4a1e1SFrank Piva 
414*94c4a1e1SFrank Piva 	Qcow2MappingMeta::io_done(qs, q, cqe);
415*94c4a1e1SFrank Piva }
416*94c4a1e1SFrank Piva 
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)417*94c4a1e1SFrank Piva int Qcow2TopTable::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
418*94c4a1e1SFrank Piva 		u64 off, u32 len)
419*94c4a1e1SFrank Piva {
420*94c4a1e1SFrank Piva 	int blk_idx = (off - offset) >> min_bs_bits;
421*94c4a1e1SFrank Piva 	int ret;
422*94c4a1e1SFrank Piva 
423*94c4a1e1SFrank Piva 	qcow2_assert(len == 512 && blk_idx < dirty.size());
424*94c4a1e1SFrank Piva 
425*94c4a1e1SFrank Piva 	if (!prep_flush(ioc, blk_idx))
426*94c4a1e1SFrank Piva 		return 0;
427*94c4a1e1SFrank Piva 
428*94c4a1e1SFrank Piva 	if (!get_blk_dirty(blk_idx)) {
429*94c4a1e1SFrank Piva 		ret = 0;
430*94c4a1e1SFrank Piva 		goto exit;
431*94c4a1e1SFrank Piva 	}
432*94c4a1e1SFrank Piva 
433*94c4a1e1SFrank Piva 	set_flush_blk_idx(blk_idx);
434*94c4a1e1SFrank Piva 
435*94c4a1e1SFrank Piva 	//need to run fsync before writting l1/refcount table, so
436*94c4a1e1SFrank Piva 	//that write order between top and l2/refcount blk is respected
437*94c4a1e1SFrank Piva 	ret = Qcow2MappingMeta::__flush(qs, ioc, off, len, true);
438*94c4a1e1SFrank Piva exit:
439*94c4a1e1SFrank Piva 	if (ret <= 0)
440*94c4a1e1SFrank Piva 		unprep_flush(blk_idx);
441*94c4a1e1SFrank Piva 	return ret;
442*94c4a1e1SFrank Piva }
443*94c4a1e1SFrank Piva 
has_dirty_slices(Qcow2State & qs,int idx)444*94c4a1e1SFrank Piva bool Qcow2TopTable::has_dirty_slices(Qcow2State &qs, int idx)
445*94c4a1e1SFrank Piva {
446*94c4a1e1SFrank Piva 	u64 entry = get_entry(idx);
447*94c4a1e1SFrank Piva 	u64 start, end, step, offset;
448*94c4a1e1SFrank Piva 
449*94c4a1e1SFrank Piva 	if (!entry)
450*94c4a1e1SFrank Piva 		return false;
451*94c4a1e1SFrank Piva 
452*94c4a1e1SFrank Piva 	if (is_mapping_meta())
453*94c4a1e1SFrank Piva 		step = 1ULL << (QCOW2_PARA::L2_TABLE_SLICE_BITS - 3 +
454*94c4a1e1SFrank Piva 				qs.header.cluster_bits);
455*94c4a1e1SFrank Piva 	else
456*94c4a1e1SFrank Piva 		step = 1ULL << (QCOW2_PARA::REFCOUNT_BLK_SLICE_BITS - 3 +
457*94c4a1e1SFrank Piva 				qs.header.cluster_bits);
458*94c4a1e1SFrank Piva 
459*94c4a1e1SFrank Piva 	start = ((u64)idx) << single_entry_order();
460*94c4a1e1SFrank Piva 	end = start + (1ULL << single_entry_order());
461*94c4a1e1SFrank Piva 	for (offset = start; offset < end; offset += step) {
462*94c4a1e1SFrank Piva 		Qcow2SliceMeta *t;
463*94c4a1e1SFrank Piva 
464*94c4a1e1SFrank Piva 		if (is_mapping_meta())
465*94c4a1e1SFrank Piva 			t = qs.cluster_map.__find_slice(offset);
466*94c4a1e1SFrank Piva 		else
467*94c4a1e1SFrank Piva 			t = qs.cluster_allocator.__find_slice(offset);
468*94c4a1e1SFrank Piva 
469*94c4a1e1SFrank Piva 		if (t && t->get_dirty(-1))
470*94c4a1e1SFrank Piva 			return true;
471*94c4a1e1SFrank Piva 	}
472*94c4a1e1SFrank Piva 
473*94c4a1e1SFrank Piva 	return false;
474*94c4a1e1SFrank Piva }
475*94c4a1e1SFrank Piva 
Qcow2L1Table(Qcow2State & qs)476*94c4a1e1SFrank Piva Qcow2L1Table::Qcow2L1Table(Qcow2State &qs): Qcow2TopTable(qs,
477*94c4a1e1SFrank Piva 		qs.get_l1_table_offset(), qs.get_l1_table_max_size(),
478*94c4a1e1SFrank Piva 		typeid(*this).name(), QCOW2_META_TOP | QCOW2_META_MAPPING)
479*94c4a1e1SFrank Piva {
480*94c4a1e1SFrank Piva }
481*94c4a1e1SFrank Piva 
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)482*94c4a1e1SFrank Piva int Qcow2L1Table::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc, u32 len, bool sync)
483*94c4a1e1SFrank Piva {
484*94c4a1e1SFrank Piva 	int ret;
485*94c4a1e1SFrank Piva 
486*94c4a1e1SFrank Piva 	ret = Qcow2Meta::load(qs, ioc, len, sync);
487*94c4a1e1SFrank Piva 	if (ret < 0)
488*94c4a1e1SFrank Piva 		ublk_err( "%s %s: load failed %d", __func__,
489*94c4a1e1SFrank Piva 				typeid(*this).name(), ret);
490*94c4a1e1SFrank Piva 	return ret;
491*94c4a1e1SFrank Piva }
492*94c4a1e1SFrank Piva 
dump()493*94c4a1e1SFrank Piva void Qcow2L1Table::dump()
494*94c4a1e1SFrank Piva {
495*94c4a1e1SFrank Piva 	qcow2_log("%s %s: sizeof %zd\n", __func__, typeid(*this).name(),
496*94c4a1e1SFrank Piva 			sizeof(*this));
497*94c4a1e1SFrank Piva 	for (int i = 0; i < header.get_l1_size(); i++)
498*94c4a1e1SFrank Piva 		qcow2_log("%d: %lx\n", i, get_entry(i));
499*94c4a1e1SFrank Piva }
500*94c4a1e1SFrank Piva 
get_entry(u32 idx)501*94c4a1e1SFrank Piva u64  Qcow2L1Table::get_entry(u32 idx) {
502*94c4a1e1SFrank Piva 	return get_entry_fast(idx);
503*94c4a1e1SFrank Piva }
504*94c4a1e1SFrank Piva 
set_entry(u32 idx,u64 val)505*94c4a1e1SFrank Piva void Qcow2L1Table::set_entry(u32 idx, u64 val) {
506*94c4a1e1SFrank Piva 	set_entry_fast(idx, val);
507*94c4a1e1SFrank Piva }
508*94c4a1e1SFrank Piva 
Qcow2RefcountTable(Qcow2State & qs)509*94c4a1e1SFrank Piva Qcow2RefcountTable::Qcow2RefcountTable(Qcow2State &qs):
510*94c4a1e1SFrank Piva 	Qcow2TopTable(qs, qs.get_refcount_table_offset(),
511*94c4a1e1SFrank Piva 		qs.get_refcount_table_max_size(),
512*94c4a1e1SFrank Piva 		typeid(*this).name(), QCOW2_META_TOP)
513*94c4a1e1SFrank Piva {
514*94c4a1e1SFrank Piva }
515*94c4a1e1SFrank Piva 
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)516*94c4a1e1SFrank Piva int Qcow2RefcountTable::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
517*94c4a1e1SFrank Piva 		u32 len, bool sync)
518*94c4a1e1SFrank Piva {
519*94c4a1e1SFrank Piva 	int ret;
520*94c4a1e1SFrank Piva 
521*94c4a1e1SFrank Piva 	ret = Qcow2Meta::load(qs, ioc, len, sync);
522*94c4a1e1SFrank Piva 	if (ret < 0)
523*94c4a1e1SFrank Piva 		ublk_err( "%s %s: load failed %d", __func__,
524*94c4a1e1SFrank Piva 				typeid(*this).name(), ret);
525*94c4a1e1SFrank Piva 	return ret;
526*94c4a1e1SFrank Piva }
527*94c4a1e1SFrank Piva 
get_entry(u32 idx)528*94c4a1e1SFrank Piva u64  Qcow2RefcountTable::get_entry(u32 idx) {
529*94c4a1e1SFrank Piva 	return get_entry_fast(idx);
530*94c4a1e1SFrank Piva }
531*94c4a1e1SFrank Piva 
set_entry(u32 idx,u64 val)532*94c4a1e1SFrank Piva void Qcow2RefcountTable::set_entry(u32 idx, u64 val) {
533*94c4a1e1SFrank Piva 	set_entry_fast(idx, val);
534*94c4a1e1SFrank Piva }
535*94c4a1e1SFrank Piva 
dump()536*94c4a1e1SFrank Piva void Qcow2RefcountTable::dump()
537*94c4a1e1SFrank Piva {
538*94c4a1e1SFrank Piva 	qcow2_log("%s %s: sizeof %zd\n", __func__, typeid(*this).name(),
539*94c4a1e1SFrank Piva 			sizeof(*this));
540*94c4a1e1SFrank Piva 	for (int i = 0; i < data_len / 8; i++) {
541*94c4a1e1SFrank Piva 		u64 entry = get_entry(i);
542*94c4a1e1SFrank Piva 
543*94c4a1e1SFrank Piva 		if (entry != 0)
544*94c4a1e1SFrank Piva 			qcow2_log("%d: %lx\n", i, entry);
545*94c4a1e1SFrank Piva 	}
546*94c4a1e1SFrank Piva }
547*94c4a1e1SFrank Piva 
Qcow2SliceMeta(Qcow2State & qs,u64 off,u32 buf_sz,const char * cls_name,u32 p_idx,u32 f)548*94c4a1e1SFrank Piva Qcow2SliceMeta::Qcow2SliceMeta(Qcow2State &qs, u64 off, u32 buf_sz,
549*94c4a1e1SFrank Piva 		const char *cls_name, u32 p_idx, u32 f):
550*94c4a1e1SFrank Piva 	Qcow2MappingMeta(qs, off, buf_sz, cls_name, f),
551*94c4a1e1SFrank Piva 	parent_idx(p_idx)
552*94c4a1e1SFrank Piva {
553*94c4a1e1SFrank Piva #ifdef QCOW2_CACHE_DEBUG
554*94c4a1e1SFrank Piva         qcow2_log("slice meta %llx/%p/%d allocated\n", off, addr, buf_sz);
555*94c4a1e1SFrank Piva #endif
556*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
557*94c4a1e1SFrank Piva 	if (posix_memalign((void **)&validate_addr, getpagesize(), buf_sz))
558*94c4a1e1SFrank Piva 		ublk_err( "%s: allocate validate memory %d bytes failed\n",
559*94c4a1e1SFrank Piva 				__func__, buf_sz);
560*94c4a1e1SFrank Piva #endif
561*94c4a1e1SFrank Piva }
562*94c4a1e1SFrank Piva 
~Qcow2SliceMeta()563*94c4a1e1SFrank Piva Qcow2SliceMeta::~Qcow2SliceMeta() {
564*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
565*94c4a1e1SFrank Piva 	free(validate_addr);
566*94c4a1e1SFrank Piva #endif
567*94c4a1e1SFrank Piva }
568*94c4a1e1SFrank Piva 
prep_flush(const qcow2_io_ctx_t & ioc)569*94c4a1e1SFrank Piva bool Qcow2SliceMeta::prep_flush(const qcow2_io_ctx_t &ioc)
570*94c4a1e1SFrank Piva {
571*94c4a1e1SFrank Piva 	if (!(flags & QCOW2_META_DIRTY))
572*94c4a1e1SFrank Piva 		return false;
573*94c4a1e1SFrank Piva 
574*94c4a1e1SFrank Piva 	if (flags & QCOW2_META_FLUSHING) {
575*94c4a1e1SFrank Piva 		add_waiter(ioc.get_tag());
576*94c4a1e1SFrank Piva 		throw MetaUpdateException();
577*94c4a1e1SFrank Piva 	}
578*94c4a1e1SFrank Piva 	flags |= QCOW2_META_FLUSHING;
579*94c4a1e1SFrank Piva 	return true;
580*94c4a1e1SFrank Piva }
581*94c4a1e1SFrank Piva 
unprep_flush()582*94c4a1e1SFrank Piva void Qcow2SliceMeta::unprep_flush() {
583*94c4a1e1SFrank Piva 	flags &= ~QCOW2_META_FLUSHING;
584*94c4a1e1SFrank Piva }
585*94c4a1e1SFrank Piva 
zero_my_cluster(Qcow2State & qs,const qcow2_io_ctx_t & ioc)586*94c4a1e1SFrank Piva int Qcow2SliceMeta::zero_my_cluster(Qcow2State &qs,
587*94c4a1e1SFrank Piva 		const qcow2_io_ctx_t &ioc)
588*94c4a1e1SFrank Piva {
589*94c4a1e1SFrank Piva 	u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
590*94c4a1e1SFrank Piva 	Qcow2ClusterState *s = qs.cluster_allocator.get_cluster_state(
591*94c4a1e1SFrank Piva 			 cluster_off);
592*94c4a1e1SFrank Piva 	u32 qid = ioc.get_qid();
593*94c4a1e1SFrank Piva 	u32 tag = ioc.get_tag();
594*94c4a1e1SFrank Piva 	const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
595*94c4a1e1SFrank Piva 	int fd = q->dev->tgt.fds[1];
596*94c4a1e1SFrank Piva 	struct io_uring_sqe *sqe;
597*94c4a1e1SFrank Piva 	int mode = FALLOC_FL_ZERO_RANGE;
598*94c4a1e1SFrank Piva 	unsigned mio_id;
599*94c4a1e1SFrank Piva 
600*94c4a1e1SFrank Piva 	if (s == nullptr)
601*94c4a1e1SFrank Piva 		return 0;
602*94c4a1e1SFrank Piva 
603*94c4a1e1SFrank Piva 	if (s->get_state() >= QCOW2_ALLOC_ZEROED)
604*94c4a1e1SFrank Piva 		return 0;
605*94c4a1e1SFrank Piva 
606*94c4a1e1SFrank Piva 	if (s->get_state() == QCOW2_ALLOC_ZEROING) {
607*94c4a1e1SFrank Piva 		s->add_waiter(ioc.get_tag());
608*94c4a1e1SFrank Piva 		throw MetaUpdateException();
609*94c4a1e1SFrank Piva 	}
610*94c4a1e1SFrank Piva 
611*94c4a1e1SFrank Piva 	sqe = io_uring_get_sqe(q->ring_ptr);
612*94c4a1e1SFrank Piva 	if (!sqe) {
613*94c4a1e1SFrank Piva 		ublk_err("%s: tag %d offset %" PRIu64 "op %d, no sqe for zeroing\n",
614*94c4a1e1SFrank Piva 			__func__, tag, offset, IORING_OP_FALLOCATE);
615*94c4a1e1SFrank Piva 		return -ENOMEM;
616*94c4a1e1SFrank Piva 	}
617*94c4a1e1SFrank Piva 
618*94c4a1e1SFrank Piva 	get_ref();
619*94c4a1e1SFrank Piva 
620*94c4a1e1SFrank Piva 	mio_id = qs.add_meta_io(qid, this);
621*94c4a1e1SFrank Piva 	s->set_state(QCOW2_ALLOC_ZEROING);
622*94c4a1e1SFrank Piva 	io_uring_prep_fallocate(sqe, fd, mode, cluster_off,
623*94c4a1e1SFrank Piva 			(1ULL << qs.header.cluster_bits));
624*94c4a1e1SFrank Piva 	sqe->user_data = build_user_data(tag,
625*94c4a1e1SFrank Piva 			IORING_OP_FALLOCATE, mio_id + 1, 1);
626*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: zeroing %p tag %d off %lx sz %d flags %x ref %d\n",
627*94c4a1e1SFrank Piva 			__func__, typeid(*this).name(), this, tag, cluster_off,
628*94c4a1e1SFrank Piva 			(1ULL << qs.header.cluster_bits), flags, refcnt);
629*94c4a1e1SFrank Piva 	return 1;
630*94c4a1e1SFrank Piva }
631*94c4a1e1SFrank Piva 
load(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u32 len,bool sync)632*94c4a1e1SFrank Piva int Qcow2SliceMeta::load(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
633*94c4a1e1SFrank Piva 		u32 len, bool sync)
634*94c4a1e1SFrank Piva {
635*94c4a1e1SFrank Piva 	int ret = -EINVAL;
636*94c4a1e1SFrank Piva 	u32 qid = ioc.get_qid();
637*94c4a1e1SFrank Piva 	u32 tag = ioc.get_tag();
638*94c4a1e1SFrank Piva 	const struct ublksrv_queue *q = ublksrv_get_queue(qs.dev, qid);
639*94c4a1e1SFrank Piva 	struct io_uring_sqe *sqe;
640*94c4a1e1SFrank Piva 	int mio_id;
641*94c4a1e1SFrank Piva 
642*94c4a1e1SFrank Piva 	if (sync) {
643*94c4a1e1SFrank Piva 		ublk_err( "%s %s: we only support async load",
644*94c4a1e1SFrank Piva 				__func__, typeid(*this).name());
645*94c4a1e1SFrank Piva 		return -EINVAL;
646*94c4a1e1SFrank Piva 	}
647*94c4a1e1SFrank Piva 
648*94c4a1e1SFrank Piva 	if (flags & QCOW2_META_UPDATE) {
649*94c4a1e1SFrank Piva 		ublk_err( "%s %s: we are update, need to load?",
650*94c4a1e1SFrank Piva 				__func__, typeid(*this).name());
651*94c4a1e1SFrank Piva 		return -EINVAL;
652*94c4a1e1SFrank Piva 	}
653*94c4a1e1SFrank Piva 
654*94c4a1e1SFrank Piva 	sqe = io_uring_get_sqe(q->ring_ptr);
655*94c4a1e1SFrank Piva 	if (!sqe) {
656*94c4a1e1SFrank Piva 		ublk_err( "%s %s: not get sqe allocated",
657*94c4a1e1SFrank Piva 				__func__, typeid(*this).name());
658*94c4a1e1SFrank Piva 		return ret;
659*94c4a1e1SFrank Piva 	}
660*94c4a1e1SFrank Piva 
661*94c4a1e1SFrank Piva 	get_ref();
662*94c4a1e1SFrank Piva 
663*94c4a1e1SFrank Piva 	mio_id = qs.add_meta_io(qid, this);
664*94c4a1e1SFrank Piva 
665*94c4a1e1SFrank Piva 	io_uring_prep_read(sqe, 1, (void *)addr, buf_sz, offset);
666*94c4a1e1SFrank Piva 	sqe->flags = IOSQE_FIXED_FILE;
667*94c4a1e1SFrank Piva 	/* meta io id starts from one and zero is reserved for plain ublk io */
668*94c4a1e1SFrank Piva 	sqe->user_data = build_user_data(tag, IORING_OP_READ, mio_id + 1, 1);
669*94c4a1e1SFrank Piva 
670*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s: queue io op %d(%llx %x %llx)"
671*94c4a1e1SFrank Piva 				" (qid %d tag %u, cmd_op %u target: %d tgt_data %d)\n",
672*94c4a1e1SFrank Piva 			__func__, sqe->opcode, sqe->off, sqe->len, sqe->addr,
673*94c4a1e1SFrank Piva 			q->q_id, tag, sqe->opcode, 1, mio_id + 1);
674*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s %s: loading %p tag %d off %lx sz %d flags %x ref %d\n",
675*94c4a1e1SFrank Piva 			__func__, typeid(*this).name(), this, tag,
676*94c4a1e1SFrank Piva 			offset, buf_sz, flags, refcnt);
677*94c4a1e1SFrank Piva 
678*94c4a1e1SFrank Piva 	return 0;
679*94c4a1e1SFrank Piva }
680*94c4a1e1SFrank Piva 
681*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
io_done_validate(Qcow2State & qs,const struct ublksrv_queue * q,struct io_uring_cqe * cqe)682*94c4a1e1SFrank Piva void Qcow2SliceMeta::io_done_validate(Qcow2State &qs, const struct ublksrv_queue *q,
683*94c4a1e1SFrank Piva 			struct io_uring_cqe *cqe)
684*94c4a1e1SFrank Piva {
685*94c4a1e1SFrank Piva 	u32 tag = user_data_to_tag(cqe->user_data);
686*94c4a1e1SFrank Piva 	u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
687*94c4a1e1SFrank Piva 	u32 op = user_data_to_op(cqe->user_data);
688*94c4a1e1SFrank Piva 	u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
689*94c4a1e1SFrank Piva 	bool res;
690*94c4a1e1SFrank Piva 
691*94c4a1e1SFrank Piva 	//for write, buffer data has been saved to validate_addr before
692*94c4a1e1SFrank Piva 	//submitting the WRITE io
693*94c4a1e1SFrank Piva 	if (op != IORING_OP_WRITE) {
694*94c4a1e1SFrank Piva 		lseek(qs.img.fd, offset, SEEK_SET);
695*94c4a1e1SFrank Piva 		read(qs.img.fd, validate_addr, buf_sz);
696*94c4a1e1SFrank Piva 	}
697*94c4a1e1SFrank Piva 
698*94c4a1e1SFrank Piva 	if (op == IORING_OP_FALLOCATE) {
699*94c4a1e1SFrank Piva 		for (int i = 0; i < buf_sz; i++) {
700*94c4a1e1SFrank Piva 			char *buf = (char *)validate_addr;
701*94c4a1e1SFrank Piva 
702*94c4a1e1SFrank Piva 			qcow2_assert(buf[i] == 0);
703*94c4a1e1SFrank Piva 		}
704*94c4a1e1SFrank Piva 	} else if (op == IORING_OP_WRITE || op == IORING_OP_READ) {
705*94c4a1e1SFrank Piva 		unsigned long *buf = (unsigned long *)addr;
706*94c4a1e1SFrank Piva 		unsigned long *buf2 = (unsigned long *)validate_addr;
707*94c4a1e1SFrank Piva 
708*94c4a1e1SFrank Piva 		res = bcmp(addr, validate_addr, buf_sz);
709*94c4a1e1SFrank Piva 
710*94c4a1e1SFrank Piva 		if (res == 0)
711*94c4a1e1SFrank Piva 			return;
712*94c4a1e1SFrank Piva 
713*94c4a1e1SFrank Piva 		for (int i = 0; i < buf_sz / 8; i++) {
714*94c4a1e1SFrank Piva 			if (buf[i] != buf2[i]) {
715*94c4a1e1SFrank Piva 				qcow2_log("%s: not same in %d %lx %lx\n",
716*94c4a1e1SFrank Piva 					__func__, i, buf[i], buf2[i]);
717*94c4a1e1SFrank Piva 				qcow2_log("%s: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
718*94c4a1e1SFrank Piva 					__func__, tag, meta_id, op, this,
719*94c4a1e1SFrank Piva 					get_flags(), get_offset(),
720*94c4a1e1SFrank Piva 					refcnt, cqe->res);
721*94c4a1e1SFrank Piva 			}
722*94c4a1e1SFrank Piva 		}
723*94c4a1e1SFrank Piva 		qcow2_assert(0);
724*94c4a1e1SFrank Piva 	}
725*94c4a1e1SFrank Piva }
726*94c4a1e1SFrank Piva #endif
727*94c4a1e1SFrank Piva 
728*94c4a1e1SFrank Piva /* called for both load() and flush() */
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)729*94c4a1e1SFrank Piva void Qcow2SliceMeta::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
730*94c4a1e1SFrank Piva 			const struct io_uring_cqe *cqe)
731*94c4a1e1SFrank Piva {
732*94c4a1e1SFrank Piva 	u32 tag = user_data_to_tag(cqe->user_data);
733*94c4a1e1SFrank Piva 	u32 meta_id = user_data_to_tgt_data(cqe->user_data) - 1;
734*94c4a1e1SFrank Piva 	u32 op = user_data_to_op(cqe->user_data);
735*94c4a1e1SFrank Piva 	u64 cluster_off = offset & ~((1ULL << qs.header.cluster_bits) - 1);
736*94c4a1e1SFrank Piva 
737*94c4a1e1SFrank Piva 	if (cqe->res < 0) {
738*94c4a1e1SFrank Piva 		qcow2_log("%s: failure: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
739*94c4a1e1SFrank Piva 			__func__, tag, meta_id, op, this,
740*94c4a1e1SFrank Piva 			get_flags(), get_offset(), refcnt, cqe->res);
741*94c4a1e1SFrank Piva 		//zeroing the cluster for holding me is done
742*94c4a1e1SFrank Piva 		if (op == IORING_OP_FALLOCATE) {
743*94c4a1e1SFrank Piva 			if (qs.cluster_allocator.
744*94c4a1e1SFrank Piva 			    alloc_cluster_reset(cluster_off))
745*94c4a1e1SFrank Piva 				goto exit;
746*94c4a1e1SFrank Piva 		} else if (op == IORING_OP_WRITE) {
747*94c4a1e1SFrank Piva 			unprep_flush();
748*94c4a1e1SFrank Piva 			goto exit;
749*94c4a1e1SFrank Piva 		} else
750*94c4a1e1SFrank Piva 			goto exit;
751*94c4a1e1SFrank Piva 	}
752*94c4a1e1SFrank Piva 
753*94c4a1e1SFrank Piva 	io_done_validate(qs, q, cqe);
754*94c4a1e1SFrank Piva 
755*94c4a1e1SFrank Piva 	if (op == IORING_OP_READ)
756*94c4a1e1SFrank Piva 		set_update(true);
757*94c4a1e1SFrank Piva 	else if (op == IORING_OP_WRITE) {
758*94c4a1e1SFrank Piva 		unprep_flush();
759*94c4a1e1SFrank Piva 		qs.meta_flushing.dec_dirtied_slice(is_mapping_meta());
760*94c4a1e1SFrank Piva 		set_dirty(-1, false);
761*94c4a1e1SFrank Piva 		set_prep_flush(false);
762*94c4a1e1SFrank Piva 	} else if (op == IORING_OP_FALLOCATE)
763*94c4a1e1SFrank Piva 		qs.cluster_allocator.alloc_cluster_zeroed(q, tag, cluster_off);
764*94c4a1e1SFrank Piva 	else
765*94c4a1e1SFrank Piva 		ublk_err( "%s: unknown op: tag %d op %d meta_id %d res %d\n",
766*94c4a1e1SFrank Piva 			__func__, tag, op, meta_id, cqe->res);
767*94c4a1e1SFrank Piva 
768*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s: tag %d, tgt_data %d op %d meta (%p %x %lx %d) res %d\n",
769*94c4a1e1SFrank Piva 			__func__, tag, meta_id, op, this,
770*94c4a1e1SFrank Piva 			get_flags(), get_offset(), refcnt, cqe->res);
771*94c4a1e1SFrank Piva 
772*94c4a1e1SFrank Piva 	//wake up waiters
773*94c4a1e1SFrank Piva 	Qcow2MappingMeta::io_done(qs, q, cqe);
774*94c4a1e1SFrank Piva 
775*94c4a1e1SFrank Piva 	//if it is evicted, now it is ready to free it
776*94c4a1e1SFrank Piva 	if ((op == IORING_OP_WRITE) && cqe->res >= 0 && get_evicted())
777*94c4a1e1SFrank Piva 		qs.add_slice_to_free_list(this);
778*94c4a1e1SFrank Piva 
779*94c4a1e1SFrank Piva exit:
780*94c4a1e1SFrank Piva 	//drop the reference grabbed in either load() or flush()
781*94c4a1e1SFrank Piva 	put_ref();
782*94c4a1e1SFrank Piva 	return;
783*94c4a1e1SFrank Piva }
784*94c4a1e1SFrank Piva 
wait_clusters(Qcow2State & qs,const qcow2_io_ctx_t & ioc)785*94c4a1e1SFrank Piva void Qcow2SliceMeta::wait_clusters(Qcow2State &qs,
786*94c4a1e1SFrank Piva 		const qcow2_io_ctx_t &ioc)
787*94c4a1e1SFrank Piva {
788*94c4a1e1SFrank Piva 	for (int i = 0; i < get_nr_entries(); i++) {
789*94c4a1e1SFrank Piva 		u64 entry = get_entry(i);
790*94c4a1e1SFrank Piva 
791*94c4a1e1SFrank Piva 		if (entry) {
792*94c4a1e1SFrank Piva 			u64 cluster_off;
793*94c4a1e1SFrank Piva 
794*94c4a1e1SFrank Piva 			//mapping meta means this is one l2 table, otherwise
795*94c4a1e1SFrank Piva 			//it is one refcount block table
796*94c4a1e1SFrank Piva 			if (is_mapping_meta())
797*94c4a1e1SFrank Piva 				cluster_off = entry & L1E_OFFSET_MASK;
798*94c4a1e1SFrank Piva 			else
799*94c4a1e1SFrank Piva 				cluster_off = virt_offset() + (u64)i << qs.header.cluster_bits;
800*94c4a1e1SFrank Piva 
801*94c4a1e1SFrank Piva 			 Qcow2ClusterState *s = qs.cluster_allocator.
802*94c4a1e1SFrank Piva 				 get_cluster_state(cluster_off);
803*94c4a1e1SFrank Piva 
804*94c4a1e1SFrank Piva 			if (s == nullptr)
805*94c4a1e1SFrank Piva 				continue;
806*94c4a1e1SFrank Piva 
807*94c4a1e1SFrank Piva 			if (s->get_state() < QCOW2_ALLOC_ZEROED) {
808*94c4a1e1SFrank Piva 				s->add_waiter(ioc.get_tag());
809*94c4a1e1SFrank Piva 				throw MetaUpdateException();
810*94c4a1e1SFrank Piva 			}
811*94c4a1e1SFrank Piva 		}
812*94c4a1e1SFrank Piva 	}
813*94c4a1e1SFrank Piva }
814*94c4a1e1SFrank Piva 
reclaim_me()815*94c4a1e1SFrank Piva void Qcow2SliceMeta::reclaim_me()
816*94c4a1e1SFrank Piva {
817*94c4a1e1SFrank Piva 	unsigned queues = header.qs.dev_info->nr_hw_queues;
818*94c4a1e1SFrank Piva 
819*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s: %p off %llx flags %x\n", __func__,
820*94c4a1e1SFrank Piva 			this, get_offset(), flags);
821*94c4a1e1SFrank Piva 
822*94c4a1e1SFrank Piva 	header.qs.remove_slice_from_evicted_list(this);
823*94c4a1e1SFrank Piva 
824*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META, "%s: %p off %llx\n", __func__, this, get_offset());
825*94c4a1e1SFrank Piva 
826*94c4a1e1SFrank Piva 	//Tell the whole world, I am leaving
827*94c4a1e1SFrank Piva 	for (int i = 0; i < queues; i++) {
828*94c4a1e1SFrank Piva 		const struct ublksrv_queue *q = ublksrv_get_queue(header.qs.dev, i);
829*94c4a1e1SFrank Piva 
830*94c4a1e1SFrank Piva 		wakeup_all(q, -1);
831*94c4a1e1SFrank Piva 	}
832*94c4a1e1SFrank Piva 	header.qs.reclaim_slice(this);
833*94c4a1e1SFrank Piva }
834*94c4a1e1SFrank Piva 
Qcow2RefcountBlock(Qcow2State & qs,u64 off,u32 p_idx,u32 f)835*94c4a1e1SFrank Piva Qcow2RefcountBlock::Qcow2RefcountBlock(Qcow2State &qs, u64 off, u32 p_idx, u32 f):
836*94c4a1e1SFrank Piva 	Qcow2SliceMeta(qs, off, QCOW2_PARA::REFCOUNT_BLK_SLICE_BYTES,
837*94c4a1e1SFrank Piva 			typeid(*this).name(), p_idx, f),
838*94c4a1e1SFrank Piva 	dirty_start_idx((unsigned)-1)
839*94c4a1e1SFrank Piva {
840*94c4a1e1SFrank Piva 	entry_bits_order = qs.header.refcount_order;
841*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META_RB, "rb meta %p %llx -> %llx \n", this, virt_offset(), off);
842*94c4a1e1SFrank Piva }
843*94c4a1e1SFrank Piva 
844*94c4a1e1SFrank Piva 
reset(Qcow2State & qs,u64 off,u32 p_idx,u32 f)845*94c4a1e1SFrank Piva void Qcow2RefcountBlock::reset(Qcow2State &qs, u64 off, u32 p_idx, u32 f)
846*94c4a1e1SFrank Piva {
847*94c4a1e1SFrank Piva 	Qcow2RefcountBlock tmp(qs, off, p_idx, f | QCOW2_META_DONT_ALLOC_BUF);
848*94c4a1e1SFrank Piva 
849*94c4a1e1SFrank Piva 	qcow2_assert(refcnt == 0);
850*94c4a1e1SFrank Piva 
851*94c4a1e1SFrank Piva 	offset = tmp.get_offset();
852*94c4a1e1SFrank Piva 	flags  = tmp.get_flags() & ~QCOW2_META_DONT_ALLOC_BUF;
853*94c4a1e1SFrank Piva 	refcnt = tmp.read_ref();
854*94c4a1e1SFrank Piva 
855*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META_RB, "%s: %p refcnt %d flags %x offset %lx \n",
856*94c4a1e1SFrank Piva 			__func__, this, refcnt, flags, offset);
857*94c4a1e1SFrank Piva 
858*94c4a1e1SFrank Piva 	next_free_idx = tmp.get_next_free_idx();
859*94c4a1e1SFrank Piva 
860*94c4a1e1SFrank Piva 	parent_idx = tmp.parent_idx;
861*94c4a1e1SFrank Piva 
862*94c4a1e1SFrank Piva 	dirty_start_idx = tmp.dirty_start_idx;
863*94c4a1e1SFrank Piva }
864*94c4a1e1SFrank Piva 
get_entry(u32 idx)865*94c4a1e1SFrank Piva u64  Qcow2RefcountBlock::get_entry(u32 idx) {
866*94c4a1e1SFrank Piva 	return get_entry_fast(idx);
867*94c4a1e1SFrank Piva }
868*94c4a1e1SFrank Piva 
set_entry(u32 idx,u64 val)869*94c4a1e1SFrank Piva void Qcow2RefcountBlock::set_entry(u32 idx, u64 val) {
870*94c4a1e1SFrank Piva 	set_entry_fast(idx, val);
871*94c4a1e1SFrank Piva 
872*94c4a1e1SFrank Piva 	if (is_flushing() || !get_update()) {
873*94c4a1e1SFrank Piva 		qcow2_log("BUG %s: obj %p flags %x off %lx\n",
874*94c4a1e1SFrank Piva 				__func__, this, flags, offset);
875*94c4a1e1SFrank Piva 		qcow2_assert(0);
876*94c4a1e1SFrank Piva 	}
877*94c4a1e1SFrank Piva }
878*94c4a1e1SFrank Piva 
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)879*94c4a1e1SFrank Piva int Qcow2RefcountBlock::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
880*94c4a1e1SFrank Piva 		u64 off, u32 len)
881*94c4a1e1SFrank Piva {
882*94c4a1e1SFrank Piva 	int ret;
883*94c4a1e1SFrank Piva 
884*94c4a1e1SFrank Piva 	//wait_clusters(qs, ioc);
885*94c4a1e1SFrank Piva 
886*94c4a1e1SFrank Piva 	if (!prep_flush(ioc))
887*94c4a1e1SFrank Piva 		return 0;
888*94c4a1e1SFrank Piva 
889*94c4a1e1SFrank Piva 	//flush can't be started unless the above two are done
890*94c4a1e1SFrank Piva 	//
891*94c4a1e1SFrank Piva 	//the ref is released in io_done()
892*94c4a1e1SFrank Piva 	get_ref();
893*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
894*94c4a1e1SFrank Piva 	memcpy(validate_addr, addr, buf_sz);
895*94c4a1e1SFrank Piva #endif
896*94c4a1e1SFrank Piva 	ret = Qcow2MappingMeta::__flush(qs, ioc, off, len);
897*94c4a1e1SFrank Piva 	if (ret <= 0) {
898*94c4a1e1SFrank Piva 		unprep_flush();
899*94c4a1e1SFrank Piva 		put_ref();
900*94c4a1e1SFrank Piva 	}
901*94c4a1e1SFrank Piva 	return ret;
902*94c4a1e1SFrank Piva }
903*94c4a1e1SFrank Piva 
~Qcow2RefcountBlock()904*94c4a1e1SFrank Piva Qcow2RefcountBlock::~Qcow2RefcountBlock()
905*94c4a1e1SFrank Piva {
906*94c4a1e1SFrank Piva }
907*94c4a1e1SFrank Piva 
get_dirty_range(u64 * start,u64 * end)908*94c4a1e1SFrank Piva void Qcow2RefcountBlock::get_dirty_range(u64 *start, u64 *end)
909*94c4a1e1SFrank Piva {
910*94c4a1e1SFrank Piva 	*start = 1;
911*94c4a1e1SFrank Piva 	*end = 0;
912*94c4a1e1SFrank Piva }
913*94c4a1e1SFrank Piva 
dump()914*94c4a1e1SFrank Piva void Qcow2RefcountBlock::dump()
915*94c4a1e1SFrank Piva {
916*94c4a1e1SFrank Piva 	unsigned cnt = 0;
917*94c4a1e1SFrank Piva 	int f = -1, l;
918*94c4a1e1SFrank Piva 	for (int i = 0; i < get_nr_entries(); i++) {
919*94c4a1e1SFrank Piva 		u64 entry = get_entry(i);
920*94c4a1e1SFrank Piva 
921*94c4a1e1SFrank Piva 		if (entry != 0) {
922*94c4a1e1SFrank Piva 			if (f == -1)
923*94c4a1e1SFrank Piva 				f = i;
924*94c4a1e1SFrank Piva 			l = i;
925*94c4a1e1SFrank Piva 			cnt++; //qcow2_log("%d: %lx\n", i, entry);
926*94c4a1e1SFrank Piva 		}
927*94c4a1e1SFrank Piva 	}
928*94c4a1e1SFrank Piva 
929*94c4a1e1SFrank Piva 	if (!cnt)
930*94c4a1e1SFrank Piva 		return;
931*94c4a1e1SFrank Piva 
932*94c4a1e1SFrank Piva 	qcow2_log("%s %s: buf_sz %u offset %" PRIx64 " sizeof %zd entries %u parent_idx %u virt_off %" PRIx64 " flags %x\n",
933*94c4a1e1SFrank Piva 			__func__, typeid(*this).name(), buf_sz, offset, sizeof(*this),
934*94c4a1e1SFrank Piva 			cnt, parent_idx, virt_offset(),
935*94c4a1e1SFrank Piva 			flags);
936*94c4a1e1SFrank Piva 	qcow2_log("\t [%d] = %" PRIx64 "/%" PRIx64 " [%d] = %" PRIx64 "/%" PRIx64 "\n",
937*94c4a1e1SFrank Piva 			f, get_entry(f),
938*94c4a1e1SFrank Piva 			virt_offset() + (f << header.cluster_bits),
939*94c4a1e1SFrank Piva 			l, get_entry(l),
940*94c4a1e1SFrank Piva 			virt_offset() + (l << header.cluster_bits));
941*94c4a1e1SFrank Piva }
942*94c4a1e1SFrank Piva 
Qcow2L2Table(Qcow2State & qs,u64 off,u32 p_idx,u32 f)943*94c4a1e1SFrank Piva Qcow2L2Table::Qcow2L2Table(Qcow2State &qs, u64 off, u32 p_idx, u32 f):
944*94c4a1e1SFrank Piva 	Qcow2SliceMeta(qs, off, QCOW2_PARA::L2_TABLE_SLICE_BYTES,
945*94c4a1e1SFrank Piva 		typeid(*this).name(), p_idx, f | QCOW2_META_MAPPING)
946*94c4a1e1SFrank Piva {
947*94c4a1e1SFrank Piva 	if (header.is_extended_l2_entries())
948*94c4a1e1SFrank Piva 		entry_bits_order <<= 1;
949*94c4a1e1SFrank Piva 	dirty_start = (u64)-1;
950*94c4a1e1SFrank Piva 	dirty_end = 0;
951*94c4a1e1SFrank Piva         ublk_dbg(UBLK_DBG_QCOW2_META_L2, "l2 meta %p %llx -> %llx \n", this, virt_offset(), off);
952*94c4a1e1SFrank Piva }
953*94c4a1e1SFrank Piva 
reset(Qcow2State & qs,u64 off,u32 p_idx,u32 f)954*94c4a1e1SFrank Piva void Qcow2L2Table::reset(Qcow2State &qs, u64 off, u32 p_idx, u32 f)
955*94c4a1e1SFrank Piva {
956*94c4a1e1SFrank Piva 	Qcow2L2Table tmp(qs, off, p_idx, f | QCOW2_META_DONT_ALLOC_BUF);
957*94c4a1e1SFrank Piva 
958*94c4a1e1SFrank Piva 	qcow2_assert(refcnt == 0);
959*94c4a1e1SFrank Piva 
960*94c4a1e1SFrank Piva 	offset = tmp.get_offset();
961*94c4a1e1SFrank Piva 	flags = tmp.get_flags() & ~QCOW2_META_DONT_ALLOC_BUF;
962*94c4a1e1SFrank Piva 	refcnt = tmp.read_ref();
963*94c4a1e1SFrank Piva 
964*94c4a1e1SFrank Piva 	ublk_dbg(UBLK_DBG_QCOW2_META_L2, "%s: %p refcnt %d flags %x offset %lx \n",
965*94c4a1e1SFrank Piva 			__func__, this, refcnt, flags, offset);
966*94c4a1e1SFrank Piva 
967*94c4a1e1SFrank Piva 	next_free_idx = tmp.get_next_free_idx();
968*94c4a1e1SFrank Piva 
969*94c4a1e1SFrank Piva 	parent_idx = tmp.parent_idx;
970*94c4a1e1SFrank Piva 
971*94c4a1e1SFrank Piva 	tmp.get_dirty_range(&dirty_start, &dirty_end);
972*94c4a1e1SFrank Piva }
973*94c4a1e1SFrank Piva 
~Qcow2L2Table()974*94c4a1e1SFrank Piva Qcow2L2Table::~Qcow2L2Table()
975*94c4a1e1SFrank Piva {
976*94c4a1e1SFrank Piva }
977*94c4a1e1SFrank Piva 
io_done(Qcow2State & qs,const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)978*94c4a1e1SFrank Piva void Qcow2L2Table::io_done(Qcow2State &qs, const struct ublksrv_queue *q,
979*94c4a1e1SFrank Piva 			const struct io_uring_cqe *cqe)
980*94c4a1e1SFrank Piva {
981*94c4a1e1SFrank Piva 	get_ref();
982*94c4a1e1SFrank Piva 	Qcow2SliceMeta::io_done(qs, q, cqe);
983*94c4a1e1SFrank Piva 	check(qs, __func__, __LINE__);
984*94c4a1e1SFrank Piva 	put_ref();
985*94c4a1e1SFrank Piva }
986*94c4a1e1SFrank Piva 
get_entry(u32 idx)987*94c4a1e1SFrank Piva u64  Qcow2L2Table::get_entry(u32 idx) {
988*94c4a1e1SFrank Piva 	return get_entry_fast(idx);
989*94c4a1e1SFrank Piva }
990*94c4a1e1SFrank Piva 
get_dirty_range(u64 * start,u64 * end)991*94c4a1e1SFrank Piva void Qcow2L2Table::get_dirty_range(u64 *start, u64 *end)
992*94c4a1e1SFrank Piva {
993*94c4a1e1SFrank Piva 	*start = dirty_start;
994*94c4a1e1SFrank Piva 	*end = dirty_end;
995*94c4a1e1SFrank Piva }
996*94c4a1e1SFrank Piva 
set_entry(u32 idx,u64 val)997*94c4a1e1SFrank Piva void Qcow2L2Table::set_entry(u32 idx, u64 val) {
998*94c4a1e1SFrank Piva 	set_entry_fast(idx, val);
999*94c4a1e1SFrank Piva 
1000*94c4a1e1SFrank Piva 	if (is_flushing() || !get_update()) {
1001*94c4a1e1SFrank Piva 		qcow2_log("BUG %s: obj %p flags %x off %lx\n",
1002*94c4a1e1SFrank Piva 				__func__, this, flags, offset);
1003*94c4a1e1SFrank Piva 		qcow2_assert(0);
1004*94c4a1e1SFrank Piva 	}
1005*94c4a1e1SFrank Piva 
1006*94c4a1e1SFrank Piva 	val &= L2E_OFFSET_MASK;
1007*94c4a1e1SFrank Piva 
1008*94c4a1e1SFrank Piva 	qcow2_assert(!(val & ((1ULL << header.cluster_bits) - 1)));
1009*94c4a1e1SFrank Piva 
1010*94c4a1e1SFrank Piva 	if (val < dirty_start)
1011*94c4a1e1SFrank Piva 		dirty_start = val;
1012*94c4a1e1SFrank Piva 	if (val > dirty_end)
1013*94c4a1e1SFrank Piva 		dirty_end = val;
1014*94c4a1e1SFrank Piva }
1015*94c4a1e1SFrank Piva 
flush(Qcow2State & qs,const qcow2_io_ctx_t & ioc,u64 off,u32 len)1016*94c4a1e1SFrank Piva int Qcow2L2Table::flush(Qcow2State &qs, const qcow2_io_ctx_t &ioc,
1017*94c4a1e1SFrank Piva 		u64 off, u32 len)
1018*94c4a1e1SFrank Piva {
1019*94c4a1e1SFrank Piva 	int ret;
1020*94c4a1e1SFrank Piva 
1021*94c4a1e1SFrank Piva 	wait_clusters(qs, ioc);
1022*94c4a1e1SFrank Piva 
1023*94c4a1e1SFrank Piva 	if (!prep_flush(ioc))
1024*94c4a1e1SFrank Piva 		return 0;
1025*94c4a1e1SFrank Piva 
1026*94c4a1e1SFrank Piva 	//flush can't be started unless the above two are done
1027*94c4a1e1SFrank Piva 	//
1028*94c4a1e1SFrank Piva 	//the ref is released in io_done()
1029*94c4a1e1SFrank Piva 	get_ref();
1030*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
1031*94c4a1e1SFrank Piva 	memcpy(validate_addr, addr, buf_sz);
1032*94c4a1e1SFrank Piva 	check_duplicated_clusters(qs, ioc.get_tag(), __func__, __LINE__);
1033*94c4a1e1SFrank Piva #endif
1034*94c4a1e1SFrank Piva 	ret = Qcow2MappingMeta::__flush(qs, ioc, off, len);
1035*94c4a1e1SFrank Piva 	if (ret <= 0) {
1036*94c4a1e1SFrank Piva 		unprep_flush();
1037*94c4a1e1SFrank Piva 		put_ref();
1038*94c4a1e1SFrank Piva 	}
1039*94c4a1e1SFrank Piva 	return ret;
1040*94c4a1e1SFrank Piva }
1041*94c4a1e1SFrank Piva 
dump()1042*94c4a1e1SFrank Piva void Qcow2L2Table::dump()
1043*94c4a1e1SFrank Piva {
1044*94c4a1e1SFrank Piva 	unsigned cnt = 0;
1045*94c4a1e1SFrank Piva 	int f = -1, l;
1046*94c4a1e1SFrank Piva 
1047*94c4a1e1SFrank Piva 	for (int i = 0; i < get_nr_entries(); i++) {
1048*94c4a1e1SFrank Piva 		u64 entry = get_entry(i);
1049*94c4a1e1SFrank Piva 
1050*94c4a1e1SFrank Piva 		if (entry != 0) {
1051*94c4a1e1SFrank Piva 			if (f == -1)
1052*94c4a1e1SFrank Piva 				f = i;
1053*94c4a1e1SFrank Piva 			l = i;
1054*94c4a1e1SFrank Piva 			cnt++; //qcow2_log("%d: %lx\n", i, entry);
1055*94c4a1e1SFrank Piva 		}
1056*94c4a1e1SFrank Piva 	}
1057*94c4a1e1SFrank Piva 
1058*94c4a1e1SFrank Piva 	if (!cnt)
1059*94c4a1e1SFrank Piva 		return;
1060*94c4a1e1SFrank Piva 
1061*94c4a1e1SFrank Piva 	qcow2_log("%s %s: buf_sz %u offset %" PRIx64 " sizeof %zd entries %u parent_idx %u virt_off %" PRIx64 " flags %x\n",
1062*94c4a1e1SFrank Piva 			__func__, typeid(*this).name(), buf_sz, offset, sizeof(*this),
1063*94c4a1e1SFrank Piva 			cnt, parent_idx, virt_offset(), flags);
1064*94c4a1e1SFrank Piva 	qcow2_log("\t [%d] = %" PRIx64 "[%u] = %" PRIx64 "\n", f,
1065*94c4a1e1SFrank Piva 			get_entry(f), l, get_entry(l));
1066*94c4a1e1SFrank Piva }
1067*94c4a1e1SFrank Piva 
1068*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
check(Qcow2State & qs,const char * func,int line)1069*94c4a1e1SFrank Piva void Qcow2L2Table::check(Qcow2State &qs, const char *func, int line)
1070*94c4a1e1SFrank Piva {
1071*94c4a1e1SFrank Piva 	int i, cnt = 0;
1072*94c4a1e1SFrank Piva 	bool bad = false;
1073*94c4a1e1SFrank Piva 
1074*94c4a1e1SFrank Piva 	if (!get_update())
1075*94c4a1e1SFrank Piva 		return;
1076*94c4a1e1SFrank Piva 
1077*94c4a1e1SFrank Piva 	//don't check evicted obj, which can't be used by anyone
1078*94c4a1e1SFrank Piva 	if (get_evicted())
1079*94c4a1e1SFrank Piva 		return;
1080*94c4a1e1SFrank Piva 
1081*94c4a1e1SFrank Piva 	for (i = 0; i < get_nr_entries(); i++) {
1082*94c4a1e1SFrank Piva 		u64 entry = get_entry(i) & ((1ULL << 63) - 1);
1083*94c4a1e1SFrank Piva 
1084*94c4a1e1SFrank Piva 		if (entry == 0)
1085*94c4a1e1SFrank Piva 			continue;
1086*94c4a1e1SFrank Piva 
1087*94c4a1e1SFrank Piva 		cnt++;
1088*94c4a1e1SFrank Piva 
1089*94c4a1e1SFrank Piva 		if (entry + (1ULL << qs.header.cluster_bits) >
1090*94c4a1e1SFrank Piva 				qs.cluster_allocator.max_physical_size) {
1091*94c4a1e1SFrank Piva 			qcow2_log("%s %d: entry %llx(parent idx %d, idx %d) offset %llx is too big\n",
1092*94c4a1e1SFrank Piva 					func, line, entry, parent_idx, i,
1093*94c4a1e1SFrank Piva 					get_offset());
1094*94c4a1e1SFrank Piva 			bad = true;
1095*94c4a1e1SFrank Piva 		}
1096*94c4a1e1SFrank Piva 
1097*94c4a1e1SFrank Piva 		if (entry & ((1ULL << qs.header.cluster_bits) - 1)) {
1098*94c4a1e1SFrank Piva 			qcow2_log("%s: entry %llx(parent idx %d, idx %d) offset %llx isn't aligned\n",
1099*94c4a1e1SFrank Piva 					func, line, entry, parent_idx, i,
1100*94c4a1e1SFrank Piva 					get_offset());
1101*94c4a1e1SFrank Piva 			bad = true;
1102*94c4a1e1SFrank Piva 		}
1103*94c4a1e1SFrank Piva 	}
1104*94c4a1e1SFrank Piva 
1105*94c4a1e1SFrank Piva 	if (bad) {
1106*94c4a1e1SFrank Piva 		qcow2_log("%s %s: %p buf_sz %u offset %llx sizeof %d parent_idx %u virt_off %llx flags %x refcnt %d\n",
1107*94c4a1e1SFrank Piva 				__func__, typeid(*this).name(), this, buf_sz, offset, sizeof(*this),
1108*94c4a1e1SFrank Piva 				parent_idx, virt_offset(), flags, read_ref());
1109*94c4a1e1SFrank Piva 		qcow2_log("\t total entries %d\n", cnt);
1110*94c4a1e1SFrank Piva 		assert(0);
1111*94c4a1e1SFrank Piva 	}
1112*94c4a1e1SFrank Piva }
1113*94c4a1e1SFrank Piva 
check_duplicated_clusters(Qcow2State & qs,int tag,const char * func,int line)1114*94c4a1e1SFrank Piva void Qcow2L2Table::check_duplicated_clusters(Qcow2State &qs, int tag,
1115*94c4a1e1SFrank Piva 		const char *func, int line)
1116*94c4a1e1SFrank Piva {
1117*94c4a1e1SFrank Piva 	for (int i = 0; i < get_nr_entries(); i++) {
1118*94c4a1e1SFrank Piva 		u64 entry = get_entry(i);
1119*94c4a1e1SFrank Piva 
1120*94c4a1e1SFrank Piva 		if (entry != 0) {
1121*94c4a1e1SFrank Piva 			u64 host_off = entry & ((1ULL << 63) - 1);
1122*94c4a1e1SFrank Piva 			u64 virt_off = virt_offset() + (((u64)i) <<
1123*94c4a1e1SFrank Piva 				qs.header.cluster_bits);
1124*94c4a1e1SFrank Piva 
1125*94c4a1e1SFrank Piva 			if (qs.validate_cluster_map(host_off, virt_off))
1126*94c4a1e1SFrank Piva 				continue;
1127*94c4a1e1SFrank Piva 			qcow2_log("BUG %s %d: tag %d obj %p flags %x off %lx virt_off "
1128*94c4a1e1SFrank Piva 					"%lx(#%d) parent_idx %d\n",
1129*94c4a1e1SFrank Piva 				func, line, tag, this, flags, offset,
1130*94c4a1e1SFrank Piva 				virt_offset(), i, parent_idx);
1131*94c4a1e1SFrank Piva 			qcow2_assert(0);
1132*94c4a1e1SFrank Piva 		}
1133*94c4a1e1SFrank Piva 	}
1134*94c4a1e1SFrank Piva }
1135*94c4a1e1SFrank Piva #endif
1136