xref: /aosp_15_r20/external/ublksrv/qcow2/qcow2.h (revision 94c4a1e103eb1715230460aab379dff275992c20)
1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #ifndef UBLK_QCOW2_H_
3*94c4a1e1SFrank Piva #define UBLK_QCOW2_H_
4*94c4a1e1SFrank Piva 
5*94c4a1e1SFrank Piva #include <string>
6*94c4a1e1SFrank Piva #include <iostream>
7*94c4a1e1SFrank Piva #include <valarray>
8*94c4a1e1SFrank Piva #include <unordered_set>
9*94c4a1e1SFrank Piva #include <unordered_map>
10*94c4a1e1SFrank Piva #include <bits/stdc++.h>
11*94c4a1e1SFrank Piva #include <exception>
12*94c4a1e1SFrank Piva #include <chrono>
13*94c4a1e1SFrank Piva #include <deque>
14*94c4a1e1SFrank Piva #include "lrucache.hpp"
15*94c4a1e1SFrank Piva #include "qcow2_format.h"
16*94c4a1e1SFrank Piva #include "qcow2_meta.h"
17*94c4a1e1SFrank Piva 
18*94c4a1e1SFrank Piva class Qcow2State;
19*94c4a1e1SFrank Piva class Qcow2Header;
20*94c4a1e1SFrank Piva 
21*94c4a1e1SFrank Piva /*
22*94c4a1e1SFrank Piva  * Design overview
23*94c4a1e1SFrank Piva  *
24*94c4a1e1SFrank Piva  * 1) code reuse:
25*94c4a1e1SFrank Piva  *    - such as code can be reused as one libqcow2
26*94c4a1e1SFrank Piva  *
27*94c4a1e1SFrank Piva  *    - internal implementation maximize reusing design & code
28*94c4a1e1SFrank Piva  *
29*94c4a1e1SFrank Piva  * 2) io isolation: io handling code often depends on os or platform or
30*94c4a1e1SFrank Piva  * user choice, so io handling isolation is considered from the beginning;
31*94c4a1e1SFrank Piva  * but focus on aio style
32*94c4a1e1SFrank Piva  *
33*94c4a1e1SFrank Piva  * 3) completely aio: for read/write io and meta
34*94c4a1e1SFrank Piva  */
35*94c4a1e1SFrank Piva 
36*94c4a1e1SFrank Piva /* MQ support:
37*94c4a1e1SFrank Piva  *
38*94c4a1e1SFrank Piva  * 1) how to share meta data among queues?  meta data has to be protected for
39*94c4a1e1SFrank Piva  * support MQ
40*94c4a1e1SFrank Piva  *
41*94c4a1e1SFrank Piva  * 2) we can start from SQ support.
42*94c4a1e1SFrank Piva  */
43*94c4a1e1SFrank Piva 
44*94c4a1e1SFrank Piva /*
45*94c4a1e1SFrank Piva  * Buffer management and cache design:
46*94c4a1e1SFrank Piva  *
47*94c4a1e1SFrank Piva  * 1) fixed amount of buffer is pre-allocated & shared for all l2 cache slice,
48*94c4a1e1SFrank Piva  * refcount blk, just like qcow2
49*94c4a1e1SFrank Piva  *
50*94c4a1e1SFrank Piva  * 2) fixed buffer is pre-allocated for header, l1, refcount table and other
51*94c4a1e1SFrank Piva  * kind of meta, but the buffer is dedicated
52*94c4a1e1SFrank Piva  *
53*94c4a1e1SFrank Piva  * Cache design(L2 table cache, refcount block cache):
54*94c4a1e1SFrank Piva  *
55*94c4a1e1SFrank Piva  * 1) why can't support for l1/refcount table
56*94c4a1e1SFrank Piva  *
57*94c4a1e1SFrank Piva  */
58*94c4a1e1SFrank Piva 
59*94c4a1e1SFrank Piva class MetaIoException: public std::exception
60*94c4a1e1SFrank Piva {
61*94c4a1e1SFrank Piva public:
what()62*94c4a1e1SFrank Piva 	const char * what() { return "MetaIO exception"; }
63*94c4a1e1SFrank Piva };
64*94c4a1e1SFrank Piva 
65*94c4a1e1SFrank Piva class MetaUpdateException: public std::exception
66*94c4a1e1SFrank Piva {
67*94c4a1e1SFrank Piva public:
what()68*94c4a1e1SFrank Piva 	const char * what() { return "MetaEntry update exception"; }
69*94c4a1e1SFrank Piva };
70*94c4a1e1SFrank Piva 
71*94c4a1e1SFrank Piva template <class T>
72*94c4a1e1SFrank Piva class slice_cache {
73*94c4a1e1SFrank Piva private:
74*94c4a1e1SFrank Piva 	u8 slice_size_bits, cluster_size_bits, slice_virt_size_bits;
75*94c4a1e1SFrank Piva 
76*94c4a1e1SFrank Piva 	cache::lru_cache<u64, T *> slices;
77*94c4a1e1SFrank Piva 	std::unordered_map<u64, T *> evicted_slices;
78*94c4a1e1SFrank Piva 
79*94c4a1e1SFrank Piva 	std::deque<T *> reclaimed_slices;
80*94c4a1e1SFrank Piva 
81*94c4a1e1SFrank Piva 	int __figure_group_for_flush(Qcow2State &qs);
82*94c4a1e1SFrank Piva 	int figure_group_from_dirty_list(Qcow2State &qs);
83*94c4a1e1SFrank Piva public:
add_slice_to_reclaim_list(T * t)84*94c4a1e1SFrank Piva 	void add_slice_to_reclaim_list(T *t) {
85*94c4a1e1SFrank Piva 		reclaimed_slices.push_back(t);
86*94c4a1e1SFrank Piva 	}
87*94c4a1e1SFrank Piva 
pick_slice_from_reclaim_list()88*94c4a1e1SFrank Piva 	T *pick_slice_from_reclaim_list() {
89*94c4a1e1SFrank Piva 		if (reclaimed_slices.empty())
90*94c4a1e1SFrank Piva 			return nullptr;
91*94c4a1e1SFrank Piva 		auto t = reclaimed_slices.front();
92*94c4a1e1SFrank Piva 		reclaimed_slices.pop_front();
93*94c4a1e1SFrank Piva 
94*94c4a1e1SFrank Piva 		return t;
95*94c4a1e1SFrank Piva 	}
96*94c4a1e1SFrank Piva 
get_nr_slices()97*94c4a1e1SFrank Piva 	unsigned get_nr_slices() {
98*94c4a1e1SFrank Piva 		return 1U << (cluster_size_bits - slice_size_bits);
99*94c4a1e1SFrank Piva 	}
100*94c4a1e1SFrank Piva 
get_slice_virt_size_bits()101*94c4a1e1SFrank Piva 	u64 get_slice_virt_size_bits() {
102*94c4a1e1SFrank Piva 		return slice_virt_size_bits;
103*94c4a1e1SFrank Piva 	}
104*94c4a1e1SFrank Piva 
get_slice_size_bits()105*94c4a1e1SFrank Piva 	u64 get_slice_size_bits() {
106*94c4a1e1SFrank Piva 		return slice_size_bits;
107*94c4a1e1SFrank Piva 	}
108*94c4a1e1SFrank Piva 
get_slices_size()109*94c4a1e1SFrank Piva 	unsigned get_slices_size() {
110*94c4a1e1SFrank Piva 		return slices.size();
111*94c4a1e1SFrank Piva 	}
112*94c4a1e1SFrank Piva 
get_evicted_slices_size()113*94c4a1e1SFrank Piva 	unsigned get_evicted_slices_size() {
114*94c4a1e1SFrank Piva 		return evicted_slices.size();
115*94c4a1e1SFrank Piva 	}
116*94c4a1e1SFrank Piva 
get_slice_idx(u64 virt_offset)117*94c4a1e1SFrank Piva 	unsigned get_slice_idx(u64 virt_offset) {
118*94c4a1e1SFrank Piva 		u32 nr_slices = 1ULL << (cluster_size_bits - slice_size_bits);
119*94c4a1e1SFrank Piva 		const u64 virt_size = ((u64)nr_slices) << slice_virt_size_bits;
120*94c4a1e1SFrank Piva 		u64 virt_base = virt_offset & ~(virt_size - 1);
121*94c4a1e1SFrank Piva 
122*94c4a1e1SFrank Piva 		return (virt_offset - virt_base) >> slice_virt_size_bits;
123*94c4a1e1SFrank Piva 	}
124*94c4a1e1SFrank Piva 
find_slice(u64 key,bool use_evicted_cache)125*94c4a1e1SFrank Piva 	T *find_slice(u64 key, bool use_evicted_cache) {
126*94c4a1e1SFrank Piva 		T *t = slices.get(key);
127*94c4a1e1SFrank Piva 
128*94c4a1e1SFrank Piva 		if (t)
129*94c4a1e1SFrank Piva 			return t;
130*94c4a1e1SFrank Piva 
131*94c4a1e1SFrank Piva 		if (use_evicted_cache) {
132*94c4a1e1SFrank Piva 			auto it = evicted_slices.find(key);
133*94c4a1e1SFrank Piva 
134*94c4a1e1SFrank Piva 			if (it != evicted_slices.end())
135*94c4a1e1SFrank Piva 				return it->second;
136*94c4a1e1SFrank Piva 		}
137*94c4a1e1SFrank Piva 		return nullptr;
138*94c4a1e1SFrank Piva 	}
139*94c4a1e1SFrank Piva 
remove_slice_from_evicted_list(T * t)140*94c4a1e1SFrank Piva 	void remove_slice_from_evicted_list(T *t) {
141*94c4a1e1SFrank Piva 		auto it = evicted_slices.find(t->virt_offset());
142*94c4a1e1SFrank Piva 
143*94c4a1e1SFrank Piva 		if (it != evicted_slices.end())
144*94c4a1e1SFrank Piva 			evicted_slices.erase(it);
145*94c4a1e1SFrank Piva 	}
146*94c4a1e1SFrank Piva 
147*94c4a1e1SFrank Piva 	//called in running flush contex
has_evicted_dirty_slices()148*94c4a1e1SFrank Piva 	bool has_evicted_dirty_slices()
149*94c4a1e1SFrank Piva 	{
150*94c4a1e1SFrank Piva 		if (evicted_slices.empty())
151*94c4a1e1SFrank Piva 			return false;
152*94c4a1e1SFrank Piva 
153*94c4a1e1SFrank Piva 		for (auto it = evicted_slices.cbegin(); it !=
154*94c4a1e1SFrank Piva 				evicted_slices.cend(); ++it) {
155*94c4a1e1SFrank Piva 			if (it->second->get_dirty(-1))
156*94c4a1e1SFrank Piva 				return true;
157*94c4a1e1SFrank Piva 		}
158*94c4a1e1SFrank Piva 		return false;
159*94c4a1e1SFrank Piva 	}
160*94c4a1e1SFrank Piva 
161*94c4a1e1SFrank Piva 	slice_cache(u8 slice_bits, u8 cluster_bits, u8 slice_virt_bits,
162*94c4a1e1SFrank Piva 			u32 max_size);
163*94c4a1e1SFrank Piva 
164*94c4a1e1SFrank Piva 	//only called from meta flushing code path
165*94c4a1e1SFrank Piva 	T *__find_slice(u64 key, bool use_evicted_cache);
166*94c4a1e1SFrank Piva 	T *alloc_slice(Qcow2State& qs, const qcow2_io_ctx_t &ioc,
167*94c4a1e1SFrank Piva 		u64 virt_offset, u64 host_offset, u32 parent_idx);
168*94c4a1e1SFrank Piva 	void add_slice_to_evicted_list(u64 virt_offset, T *l2);
169*94c4a1e1SFrank Piva 	void dump(Qcow2State &qs);
170*94c4a1e1SFrank Piva 	int figure_group_for_flush(Qcow2State &qs);
171*94c4a1e1SFrank Piva 	bool has_dirty_slice(Qcow2State &qs);
172*94c4a1e1SFrank Piva 	void shrink(Qcow2State &qs);
173*94c4a1e1SFrank Piva };
174*94c4a1e1SFrank Piva 
175*94c4a1e1SFrank Piva /* todo: remove caches in destructor */
176*94c4a1e1SFrank Piva class Qcow2ClusterMapping {
177*94c4a1e1SFrank Piva private:
178*94c4a1e1SFrank Piva 	Qcow2State &state;
179*94c4a1e1SFrank Piva 	slice_cache <Qcow2L2Table> cache;
180*94c4a1e1SFrank Piva 
181*94c4a1e1SFrank Piva 	friend class Qcow2State;
182*94c4a1e1SFrank Piva 
183*94c4a1e1SFrank Piva 	u32 cluster_bits, l2_entries_order;
184*94c4a1e1SFrank Piva 
185*94c4a1e1SFrank Piva 	//l1/l2 entry alloc state
186*94c4a1e1SFrank Piva 	//
187*94c4a1e1SFrank Piva 	//added before allocating one l1/l2 entry, and freed after
188*94c4a1e1SFrank Piva 	//the allocation is done
189*94c4a1e1SFrank Piva 	//
190*94c4a1e1SFrank Piva 	//For l1, the key is (1ULL << 63) | offset & ~((1ULL << (cluster_bits + l2 entries bits)) - 1)
191*94c4a1e1SFrank Piva 	//
192*94c4a1e1SFrank Piva 	//for l2, the key is offset & ~((1ULL << cluster_bits) - 1)
193*94c4a1e1SFrank Piva 	std::unordered_map<u64, u32> entry_alloc;
194*94c4a1e1SFrank Piva 	u32 max_alloc_entries;
195*94c4a1e1SFrank Piva 
l2_slice_virt_size()196*94c4a1e1SFrank Piva 	u64 l2_slice_virt_size() {
197*94c4a1e1SFrank Piva 		return 1ULL << (cluster_bits + L2_TABLE_SLICE_BITS - 3);
198*94c4a1e1SFrank Piva 	}
199*94c4a1e1SFrank Piva 
l2_slice_key(u64 virt_offset)200*94c4a1e1SFrank Piva 	u64 l2_slice_key(u64 virt_offset) {
201*94c4a1e1SFrank Piva 		return ((virt_offset) & ~(l2_slice_virt_size() - 1));
202*94c4a1e1SFrank Piva 	}
203*94c4a1e1SFrank Piva 
__entry_get_alloc_state(u64 key)204*94c4a1e1SFrank Piva 	u32 __entry_get_alloc_state(u64 key) {
205*94c4a1e1SFrank Piva 		auto it = entry_alloc.find(key);
206*94c4a1e1SFrank Piva 
207*94c4a1e1SFrank Piva 		if (it != entry_alloc.end())
208*94c4a1e1SFrank Piva 			return it->second;
209*94c4a1e1SFrank Piva 		return -1;
210*94c4a1e1SFrank Piva 	}
211*94c4a1e1SFrank Piva 
__entry_is_allocating(u64 key)212*94c4a1e1SFrank Piva 	bool __entry_is_allocating(u64 key) {
213*94c4a1e1SFrank Piva 		u32 state = __entry_get_alloc_state(key);
214*94c4a1e1SFrank Piva 
215*94c4a1e1SFrank Piva 		return state != -1;
216*94c4a1e1SFrank Piva 	}
217*94c4a1e1SFrank Piva 
__entry_mark_allocating(u64 key,u32 owner)218*94c4a1e1SFrank Piva 	void __entry_mark_allocating(u64 key, u32 owner) {
219*94c4a1e1SFrank Piva 		auto it = entry_alloc.find(key);
220*94c4a1e1SFrank Piva 		u32 sz;
221*94c4a1e1SFrank Piva 
222*94c4a1e1SFrank Piva 		qcow2_assert(it == entry_alloc.end());
223*94c4a1e1SFrank Piva 
224*94c4a1e1SFrank Piva 		entry_alloc[key] = owner;
225*94c4a1e1SFrank Piva 
226*94c4a1e1SFrank Piva 		sz = entry_alloc.size();
227*94c4a1e1SFrank Piva 		if (sz > max_alloc_entries)
228*94c4a1e1SFrank Piva 			max_alloc_entries = sz;
229*94c4a1e1SFrank Piva 	}
230*94c4a1e1SFrank Piva 
__entry_mark_allocated(u64 key)231*94c4a1e1SFrank Piva 	void __entry_mark_allocated(u64 key) {
232*94c4a1e1SFrank Piva 		auto it = entry_alloc.find(key);
233*94c4a1e1SFrank Piva 
234*94c4a1e1SFrank Piva 		qcow2_assert(it != entry_alloc.end());
235*94c4a1e1SFrank Piva 
236*94c4a1e1SFrank Piva 		entry_alloc.erase(it);
237*94c4a1e1SFrank Piva 	}
238*94c4a1e1SFrank Piva 
l1_entry_alloc_key(u64 offset)239*94c4a1e1SFrank Piva 	u64 l1_entry_alloc_key(u64 offset) {
240*94c4a1e1SFrank Piva 		return (offset & ~((1ULL << (cluster_bits +
241*94c4a1e1SFrank Piva 					     l2_entries_order)) - 1)) |
242*94c4a1e1SFrank Piva 				(1ULL << 63);
243*94c4a1e1SFrank Piva 	}
244*94c4a1e1SFrank Piva 
l2_entry_alloc_key(u64 offset)245*94c4a1e1SFrank Piva 	u64 l2_entry_alloc_key(u64 offset) {
246*94c4a1e1SFrank Piva 		u64 key = (offset & ~((1ULL << cluster_bits) - 1));
247*94c4a1e1SFrank Piva 
248*94c4a1e1SFrank Piva 		qcow2_assert(!(key & (1ULL << 63)));
249*94c4a1e1SFrank Piva 		return key;
250*94c4a1e1SFrank Piva 	}
251*94c4a1e1SFrank Piva 
entry_alloc_key(u64 offset,bool l1)252*94c4a1e1SFrank Piva 	u64 entry_alloc_key(u64 offset, bool l1) {
253*94c4a1e1SFrank Piva 		if (l1)
254*94c4a1e1SFrank Piva 			return l1_entry_alloc_key(offset);
255*94c4a1e1SFrank Piva 		return l2_entry_alloc_key(offset);
256*94c4a1e1SFrank Piva 	}
257*94c4a1e1SFrank Piva 
entry_is_allocating(u64 offset,bool l1)258*94c4a1e1SFrank Piva 	bool entry_is_allocating(u64 offset, bool l1) {
259*94c4a1e1SFrank Piva 		u64 key = entry_alloc_key(offset, l1);
260*94c4a1e1SFrank Piva 
261*94c4a1e1SFrank Piva 		return __entry_is_allocating(key);
262*94c4a1e1SFrank Piva 	}
263*94c4a1e1SFrank Piva 
entry_get_alloc_owner(u64 offset,bool l1)264*94c4a1e1SFrank Piva 	u32 entry_get_alloc_owner(u64 offset, bool l1) {
265*94c4a1e1SFrank Piva 		u64 key = entry_alloc_key(offset, l1);
266*94c4a1e1SFrank Piva 		u32 state = __entry_get_alloc_state(key);
267*94c4a1e1SFrank Piva 
268*94c4a1e1SFrank Piva 		qcow2_assert(state != -1);
269*94c4a1e1SFrank Piva 		return state;
270*94c4a1e1SFrank Piva 	}
271*94c4a1e1SFrank Piva 
entry_mark_allocating(u64 offset,u32 owner,bool l1)272*94c4a1e1SFrank Piva 	void entry_mark_allocating(u64 offset, u32 owner, bool l1) {
273*94c4a1e1SFrank Piva 		u64 key = entry_alloc_key(offset, l1);
274*94c4a1e1SFrank Piva 
275*94c4a1e1SFrank Piva 		__entry_mark_allocating(key, owner);
276*94c4a1e1SFrank Piva 	}
277*94c4a1e1SFrank Piva 
entry_mark_allocated(u64 offset,bool l1)278*94c4a1e1SFrank Piva 	void entry_mark_allocated(u64 offset, bool l1) {
279*94c4a1e1SFrank Piva 		u64 key = entry_alloc_key(offset, l1);
280*94c4a1e1SFrank Piva 
281*94c4a1e1SFrank Piva 		__entry_mark_allocated(key);
282*94c4a1e1SFrank Piva 	}
283*94c4a1e1SFrank Piva 
284*94c4a1e1SFrank Piva 	Qcow2L2Table *create_and_add_l2(const qcow2_io_ctx_t &ioc, u64 offset);
285*94c4a1e1SFrank Piva 	Qcow2L2Table *load_l2_slice(const qcow2_io_ctx_t &ioc, u64 offset,
286*94c4a1e1SFrank Piva 			u64 l1_entry);
287*94c4a1e1SFrank Piva 	int build_mapping(const qcow2_io_ctx_t &ioc,
288*94c4a1e1SFrank Piva 		u64 virt_offset, Qcow2L2Table *l2, u32 idx_in_slice,
289*94c4a1e1SFrank Piva 		u64 *l2_entry);
290*94c4a1e1SFrank Piva 	u64 __map_cluster(const qcow2_io_ctx_t &ioc,
291*94c4a1e1SFrank Piva 		Qcow2L2Table *l2, u64 offset, bool create_l2);
292*94c4a1e1SFrank Piva 	Qcow2L2Table *create_l2_map(const qcow2_io_ctx_t &ioc, u64 offset,
293*94c4a1e1SFrank Piva 			bool create_l2);
294*94c4a1e1SFrank Piva public:
295*94c4a1e1SFrank Piva 	// refcount table shouldn't be so big
296*94c4a1e1SFrank Piva 	Qcow2ClusterMapping(Qcow2State &qs);
297*94c4a1e1SFrank Piva 
298*94c4a1e1SFrank Piva 	//the main logic for mapping cluster
299*94c4a1e1SFrank Piva 	//create l2 and setup the mapping if 'create_l2' is true & l2 isn't
300*94c4a1e1SFrank Piva 	//present for this 'offset'
301*94c4a1e1SFrank Piva 	u64 map_cluster(const qcow2_io_ctx_t &ioc, u64 offset, bool create_l2);
302*94c4a1e1SFrank Piva 	int figure_group_from_l1_table();
303*94c4a1e1SFrank Piva 
304*94c4a1e1SFrank Piva 	Qcow2L2Table* __find_slice(u64 key, bool use_dirty=true);
305*94c4a1e1SFrank Piva 
l1_idx(u64 offset)306*94c4a1e1SFrank Piva 	u64 l1_idx(u64 offset) {
307*94c4a1e1SFrank Piva 		return offset >> (cluster_bits + l2_entries_order);
308*94c4a1e1SFrank Piva 	}
309*94c4a1e1SFrank Piva 
l2_idx(u64 offset)310*94c4a1e1SFrank Piva 	u64 l2_idx(u64 offset) {
311*94c4a1e1SFrank Piva 		return (offset >> cluster_bits) &
312*94c4a1e1SFrank Piva 			((1ULL << l2_entries_order) - 1);
313*94c4a1e1SFrank Piva 	}
314*94c4a1e1SFrank Piva 
has_evicted_dirty_slices()315*94c4a1e1SFrank Piva 	bool has_evicted_dirty_slices()
316*94c4a1e1SFrank Piva 	{
317*94c4a1e1SFrank Piva 		return cache.has_evicted_dirty_slices();
318*94c4a1e1SFrank Piva 	}
319*94c4a1e1SFrank Piva 
320*94c4a1e1SFrank Piva 	void dump_meta();
321*94c4a1e1SFrank Piva };
322*94c4a1e1SFrank Piva 
323*94c4a1e1SFrank Piva enum QCOW2_CLUSTER_USE {
324*94c4a1e1SFrank Piva 	L2_TABLE = 0,
325*94c4a1e1SFrank Piva 	REFCOUNT_BLK = 1,
326*94c4a1e1SFrank Piva 	DATA = 2,
327*94c4a1e1SFrank Piva };
328*94c4a1e1SFrank Piva 
329*94c4a1e1SFrank Piva /*
330*94c4a1e1SFrank Piva  * Think about lifetime issue. Is it possible that one state is removed
331*94c4a1e1SFrank Piva  * but it is being used somewhere?
332*94c4a1e1SFrank Piva  *
333*94c4a1e1SFrank Piva  * So far the simple rule is that the state can only be removed after
334*94c4a1e1SFrank Piva  * its state becomes QCOW2_ALLOC_ZEROED.
335*94c4a1e1SFrank Piva  *
336*94c4a1e1SFrank Piva  * So except for being absolute safety, don't call get_cluster_state()
337*94c4a1e1SFrank Piva  * directly.
338*94c4a1e1SFrank Piva  */
339*94c4a1e1SFrank Piva class Qcow2ClusterState {
340*94c4a1e1SFrank Piva #define QCOW2_ALLOC_STARTED	0	//cluster allocated in ram
341*94c4a1e1SFrank Piva #define QCOW2_ALLOC_ZEROING	1	//IO for zeroing this cluster is submitted
342*94c4a1e1SFrank Piva #define QCOW2_ALLOC_ZEROED	2	//cluster zeroed
343*94c4a1e1SFrank Piva #define QCOW2_ALLOC_DONE	3	//mapping setup
344*94c4a1e1SFrank Piva private:
345*94c4a1e1SFrank Piva 	u8 state;
346*94c4a1e1SFrank Piva 	u8 purpose;
347*94c4a1e1SFrank Piva 	IOWaiters io_waiters;
348*94c4a1e1SFrank Piva 
349*94c4a1e1SFrank Piva public:
Qcow2ClusterState()350*94c4a1e1SFrank Piva 	Qcow2ClusterState() {
351*94c4a1e1SFrank Piva 		state = QCOW2_ALLOC_STARTED;
352*94c4a1e1SFrank Piva 	}
353*94c4a1e1SFrank Piva 
Qcow2ClusterState(u8 s,u8 p)354*94c4a1e1SFrank Piva 	Qcow2ClusterState(u8 s, u8 p) {
355*94c4a1e1SFrank Piva 		state = s;
356*94c4a1e1SFrank Piva 		purpose = p;
357*94c4a1e1SFrank Piva 	}
358*94c4a1e1SFrank Piva 
359*94c4a1e1SFrank Piva 	//called after the cluster is allocated from ram
get_state()360*94c4a1e1SFrank Piva 	u8 get_state() {
361*94c4a1e1SFrank Piva 		return state;
362*94c4a1e1SFrank Piva 	}
363*94c4a1e1SFrank Piva 
set_state(u8 s)364*94c4a1e1SFrank Piva 	void set_state(u8 s) {
365*94c4a1e1SFrank Piva 		state = s;
366*94c4a1e1SFrank Piva 	}
367*94c4a1e1SFrank Piva 
get_purpose()368*94c4a1e1SFrank Piva 	u8 get_purpose() {
369*94c4a1e1SFrank Piva 		return purpose;
370*94c4a1e1SFrank Piva 	}
371*94c4a1e1SFrank Piva 
add_waiter(unsigned tag)372*94c4a1e1SFrank Piva 	void add_waiter(unsigned tag) {
373*94c4a1e1SFrank Piva 		io_waiters.add_waiter(tag);
374*94c4a1e1SFrank Piva 	}
375*94c4a1e1SFrank Piva 
wakeup_all(const struct ublksrv_queue * q,unsigned my_tag)376*94c4a1e1SFrank Piva 	void wakeup_all(const struct ublksrv_queue *q, unsigned my_tag) {
377*94c4a1e1SFrank Piva 		io_waiters.wakeup_all(q, my_tag);
378*94c4a1e1SFrank Piva 	}
379*94c4a1e1SFrank Piva };
380*94c4a1e1SFrank Piva 
381*94c4a1e1SFrank Piva /* todo: remove caches in destructor */
382*94c4a1e1SFrank Piva class Qcow2ClusterAllocator {
383*94c4a1e1SFrank Piva private:
384*94c4a1e1SFrank Piva 	Qcow2State &state;
385*94c4a1e1SFrank Piva 	s32 slice_idx;
386*94c4a1e1SFrank Piva 	u8  table_entry_virt_size_bits;
387*94c4a1e1SFrank Piva 	u64 alloc_cnt;
388*94c4a1e1SFrank Piva 	slice_cache <Qcow2RefcountBlock> cache;
389*94c4a1e1SFrank Piva 
390*94c4a1e1SFrank Piva 	u32 refcount_block_entries();
391*94c4a1e1SFrank Piva 	void allocate_refcount_blk(const qcow2_io_ctx_t &ioc, s32 idx);
392*94c4a1e1SFrank Piva 
393*94c4a1e1SFrank Piva 	friend class Qcow2State;
394*94c4a1e1SFrank Piva 
395*94c4a1e1SFrank Piva public:
396*94c4a1e1SFrank Piva 	//key is cluster start offset, val is its allocate status
397*94c4a1e1SFrank Piva 	std::unordered_map<u64, Qcow2ClusterState *> alloc_state;
398*94c4a1e1SFrank Piva 	u32 max_alloc_states;
399*94c4a1e1SFrank Piva 	u64 max_physical_size;
400*94c4a1e1SFrank Piva 
401*94c4a1e1SFrank Piva 	// refcount table shouldn't be so big
402*94c4a1e1SFrank Piva 	Qcow2ClusterAllocator(Qcow2State &qs);
403*94c4a1e1SFrank Piva 
404*94c4a1e1SFrank Piva 	//called after refcount table is loaded
405*94c4a1e1SFrank Piva 	void setup();
406*94c4a1e1SFrank Piva 	u64 allocate_cluster(const qcow2_io_ctx_t &ioc);
407*94c4a1e1SFrank Piva 	u64 refcount_blk_key(const Qcow2RefcountBlock *rb);
408*94c4a1e1SFrank Piva 	void dump_meta();
409*94c4a1e1SFrank Piva 	int figure_group_from_refcount_table();
410*94c4a1e1SFrank Piva 
411*94c4a1e1SFrank Piva 	Qcow2RefcountBlock* __find_slice(u64 key);
412*94c4a1e1SFrank Piva 
has_evicted_dirty_slices()413*94c4a1e1SFrank Piva 	bool has_evicted_dirty_slices()
414*94c4a1e1SFrank Piva 	{
415*94c4a1e1SFrank Piva 		return cache.has_evicted_dirty_slices();
416*94c4a1e1SFrank Piva 	}
417*94c4a1e1SFrank Piva 
418*94c4a1e1SFrank Piva 	/* the following helpers are for implementing soft update */
419*94c4a1e1SFrank Piva 
420*94c4a1e1SFrank Piva 	//don't refer to one state after one cycle of coroutine wait &
421*94c4a1e1SFrank Piva 	//wakeup, and caller has to check if the return value
get_cluster_state(u64 cluster_offset)422*94c4a1e1SFrank Piva 	Qcow2ClusterState *get_cluster_state(u64 cluster_offset) {
423*94c4a1e1SFrank Piva 		auto it = alloc_state.find(cluster_offset);
424*94c4a1e1SFrank Piva 
425*94c4a1e1SFrank Piva 		if (it == alloc_state.end())
426*94c4a1e1SFrank Piva 			return nullptr;
427*94c4a1e1SFrank Piva 
428*94c4a1e1SFrank Piva 		return it->second;
429*94c4a1e1SFrank Piva 	}
430*94c4a1e1SFrank Piva 
431*94c4a1e1SFrank Piva 	//the zeroing io may return -EAGAIN, then we need to
432*94c4a1e1SFrank Piva 	//reset its state for re-issuing zeroing IO
alloc_cluster_reset(u64 cluster_offset)433*94c4a1e1SFrank Piva 	bool alloc_cluster_reset(u64 cluster_offset) {
434*94c4a1e1SFrank Piva 		auto it = alloc_state.find(cluster_offset);
435*94c4a1e1SFrank Piva 
436*94c4a1e1SFrank Piva 		if (it == alloc_state.end())
437*94c4a1e1SFrank Piva 			return false;
438*94c4a1e1SFrank Piva 
439*94c4a1e1SFrank Piva 		//maybe the cluster has been zeroed, so double check
440*94c4a1e1SFrank Piva 		if (it->second->get_state() < QCOW2_ALLOC_ZEROED) {
441*94c4a1e1SFrank Piva 			it->second->set_state(QCOW2_ALLOC_STARTED);
442*94c4a1e1SFrank Piva 			return true;
443*94c4a1e1SFrank Piva 		}
444*94c4a1e1SFrank Piva 		return false;
445*94c4a1e1SFrank Piva 	}
446*94c4a1e1SFrank Piva 
447*94c4a1e1SFrank Piva 	//called after the cluster is allocated from ram
448*94c4a1e1SFrank Piva 	void alloc_cluster_started(const qcow2_io_ctx_t &ioc,
449*94c4a1e1SFrank Piva 			u64 cluster_offset, u8 purpose);
450*94c4a1e1SFrank Piva 
451*94c4a1e1SFrank Piva 	//check if the allocated cluster is zeroed
alloc_cluster_is_zeroed(u64 cluster_offset)452*94c4a1e1SFrank Piva 	bool alloc_cluster_is_zeroed(u64 cluster_offset) {
453*94c4a1e1SFrank Piva 		Qcow2ClusterState * cs = get_cluster_state(cluster_offset);
454*94c4a1e1SFrank Piva 
455*94c4a1e1SFrank Piva 		return cs == nullptr || cs->get_state() >= QCOW2_ALLOC_ZEROED;
456*94c4a1e1SFrank Piva 	}
457*94c4a1e1SFrank Piva 
458*94c4a1e1SFrank Piva 	//called after IO for zeroing this cluster is started
459*94c4a1e1SFrank Piva 	void alloc_cluster_zeroing(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
460*94c4a1e1SFrank Piva 
461*94c4a1e1SFrank Piva 	//called after the cluster is zeroed
462*94c4a1e1SFrank Piva 	void alloc_cluster_zeroed(const struct ublksrv_queue *q,
463*94c4a1e1SFrank Piva 			int tag, u64 cluster_offset);
464*94c4a1e1SFrank Piva 
465*94c4a1e1SFrank Piva 	//called after the cluster is zeroed and associated mapping is updated
466*94c4a1e1SFrank Piva 	void alloc_cluster_done(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
467*94c4a1e1SFrank Piva 
468*94c4a1e1SFrank Piva 	//called after the cluster is zeroed and associated mapping is updated
469*94c4a1e1SFrank Piva 	void alloc_cluster_add_waiter(const qcow2_io_ctx_t &ioc,
470*94c4a1e1SFrank Piva 			u64 cluster_offset);
471*94c4a1e1SFrank Piva };
472*94c4a1e1SFrank Piva 
473*94c4a1e1SFrank Piva class Qcow2Image {
474*94c4a1e1SFrank Piva private:
475*94c4a1e1SFrank Piva 	std::string	fpath;
476*94c4a1e1SFrank Piva public:
477*94c4a1e1SFrank Piva 	int fd;
478*94c4a1e1SFrank Piva 	Qcow2Image(const char *path);
479*94c4a1e1SFrank Piva 	~Qcow2Image();
480*94c4a1e1SFrank Piva };
481*94c4a1e1SFrank Piva 
482*94c4a1e1SFrank Piva enum qcow2_meta_flush {
483*94c4a1e1SFrank Piva 	IDLE,
484*94c4a1e1SFrank Piva 	PREP_WRITE_SLICES, //all slices are added to list for flush
485*94c4a1e1SFrank Piva 	ZERO_MY_CLUSTER,
486*94c4a1e1SFrank Piva 	WAIT,	//valid only for mapping table, wait for refcount table flushing done
487*94c4a1e1SFrank Piva 	WRITE_SLICES,
488*94c4a1e1SFrank Piva 	WRITE_TOP,
489*94c4a1e1SFrank Piva 	DONE,
490*94c4a1e1SFrank Piva };
491*94c4a1e1SFrank Piva 
492*94c4a1e1SFrank Piva class MetaFlushingState {
493*94c4a1e1SFrank Piva private:
494*94c4a1e1SFrank Piva 	// for flushing slices depended by current parent_idx, and for
495*94c4a1e1SFrank Piva 	// handling state of WRITE_SLICE
496*94c4a1e1SFrank Piva 	//
497*94c4a1e1SFrank Piva 	//any slices depended by current parent_idx are added to this list,
498*94c4a1e1SFrank Piva 	//and it is removed after the flushing is done
499*94c4a1e1SFrank Piva 	//
500*94c4a1e1SFrank Piva 	//once the list becomes empty, the state is switched to
501*94c4a1e1SFrank Piva 	//WRITE_TOP.
502*94c4a1e1SFrank Piva 	std::vector <Qcow2SliceMeta *> slices_to_flush;
503*94c4a1e1SFrank Piva 	std::vector <Qcow2SliceMeta *> slices_in_flight;
504*94c4a1e1SFrank Piva 	unsigned state;
505*94c4a1e1SFrank Piva 	int parent_blk_idx;
506*94c4a1e1SFrank Piva 	int parent_entry_idx;
507*94c4a1e1SFrank Piva 	bool mapping;
508*94c4a1e1SFrank Piva 
509*94c4a1e1SFrank Piva 	void del_meta_from_list(std::vector <Qcow2SliceMeta *> &v,
510*94c4a1e1SFrank Piva 		const Qcow2SliceMeta *t);
511*94c4a1e1SFrank Piva 
512*94c4a1e1SFrank Piva 	void __prep_write_slice(Qcow2State &qs, const struct ublksrv_queue *q);
513*94c4a1e1SFrank Piva 
514*94c4a1e1SFrank Piva 	void __zero_my_cluster(Qcow2State &qs, const struct ublksrv_queue *q);
515*94c4a1e1SFrank Piva 	co_io_job __zero_my_cluster_co(Qcow2State &qs,
516*94c4a1e1SFrank Piva 		const struct ublksrv_queue *q, struct ublk_io_tgt *io, int tag,
517*94c4a1e1SFrank Piva 		Qcow2SliceMeta *m);
518*94c4a1e1SFrank Piva 
519*94c4a1e1SFrank Piva 	void __write_slices(Qcow2State &qs, const struct ublksrv_queue *q);
520*94c4a1e1SFrank Piva 	co_io_job __write_slice_co(Qcow2State &qs,
521*94c4a1e1SFrank Piva 		const struct ublksrv_queue *q, Qcow2SliceMeta *m,
522*94c4a1e1SFrank Piva 		struct ublk_io_tgt *io, int tag);
523*94c4a1e1SFrank Piva 
524*94c4a1e1SFrank Piva 	void __write_top(Qcow2State &qs, const struct ublksrv_queue *q);
525*94c4a1e1SFrank Piva 	co_io_job  __write_top_co(Qcow2State &qs, const struct ublksrv_queue *q,
526*94c4a1e1SFrank Piva 			struct ublk_io_tgt *io, int tag);
527*94c4a1e1SFrank Piva 
528*94c4a1e1SFrank Piva 	void __done(Qcow2State &qs, const struct ublksrv_queue *q);
529*94c4a1e1SFrank Piva 	bool __need_flush(int queued);
530*94c4a1e1SFrank Piva 	void mark_no_update();
531*94c4a1e1SFrank Piva public:
532*94c4a1e1SFrank Piva 	Qcow2TopTable &top;
533*94c4a1e1SFrank Piva 	unsigned slice_dirtied;
534*94c4a1e1SFrank Piva 	std::chrono::system_clock::time_point last_flush;
535*94c4a1e1SFrank Piva 
get_state()536*94c4a1e1SFrank Piva 	unsigned get_state() const {
537*94c4a1e1SFrank Piva 		return state;
538*94c4a1e1SFrank Piva 	}
set_state(u32 s)539*94c4a1e1SFrank Piva 	void set_state(u32 s) {
540*94c4a1e1SFrank Piva 		ublk_dbg(UBLK_DBG_QCOW2_FLUSH, "%s: map %d slice_dirtied %u parent_blk_idx %d"
541*94c4a1e1SFrank Piva 				" parent_entry_idx %d %d->%d to_flush %zd in_flight %zd\n",
542*94c4a1e1SFrank Piva 				__func__, mapping, slice_dirtied,
543*94c4a1e1SFrank Piva 				parent_blk_idx, parent_entry_idx, state,
544*94c4a1e1SFrank Piva 				s, slices_to_flush.size(),
545*94c4a1e1SFrank Piva 				slices_in_flight.size());
546*94c4a1e1SFrank Piva 		state = s;
547*94c4a1e1SFrank Piva 	}
548*94c4a1e1SFrank Piva 
549*94c4a1e1SFrank Piva 	MetaFlushingState(Qcow2TopTable &t, bool is_mapping);
550*94c4a1e1SFrank Piva 	void slice_is_done(const Qcow2SliceMeta*);
551*94c4a1e1SFrank Piva 	void add_slice_to_flush(Qcow2SliceMeta *m);
552*94c4a1e1SFrank Piva 	void run_flush(Qcow2State &qs, const struct ublksrv_queue *q,
553*94c4a1e1SFrank Piva 			int top_blk_idx);
554*94c4a1e1SFrank Piva 	bool need_flush(Qcow2State &qs, int *top_idx, unsigned queued);
555*94c4a1e1SFrank Piva 	void dump(const char *func, int line) const;
556*94c4a1e1SFrank Piva 	int calc_refcount_dirty_blk_range(Qcow2State& qs,
557*94c4a1e1SFrank Piva 			int *refcnt_blk_start, int *refcnt_blk_end);
558*94c4a1e1SFrank Piva };
559*94c4a1e1SFrank Piva 
560*94c4a1e1SFrank Piva /*
561*94c4a1e1SFrank Piva  * For any kind of meta flushing, one tag or io slot is required,
562*94c4a1e1SFrank Piva  * so start the meta flushing class with meta tag allocator.
563*94c4a1e1SFrank Piva  *
564*94c4a1e1SFrank Piva  * Meta data updating is never forground task, so if running out
565*94c4a1e1SFrank Piva  * of tags, let's wait until one tag is released.
566*94c4a1e1SFrank Piva  */
567*94c4a1e1SFrank Piva class Qcow2MetaFlushing {
568*94c4a1e1SFrank Piva private:
569*94c4a1e1SFrank Piva 	std::vector <bool> tags;
570*94c4a1e1SFrank Piva 
571*94c4a1e1SFrank Piva 	int refcnt_blk_start;
572*94c4a1e1SFrank Piva 	int refcnt_blk_end;
573*94c4a1e1SFrank Piva 
574*94c4a1e1SFrank Piva 	bool handle_mapping_dependency_start_end(Qcow2State *qs,
575*94c4a1e1SFrank Piva 			const struct ublksrv_queue *q);
576*94c4a1e1SFrank Piva 	void handle_mapping_dependency(Qcow2State *qs,
577*94c4a1e1SFrank Piva 			const struct ublksrv_queue *q);
578*94c4a1e1SFrank Piva public:
579*94c4a1e1SFrank Piva 	Qcow2State &state;
580*94c4a1e1SFrank Piva 
581*94c4a1e1SFrank Piva 	MetaFlushingState mapping_stat;
582*94c4a1e1SFrank Piva 	MetaFlushingState refcount_stat;
583*94c4a1e1SFrank Piva 
inc_dirtied_slice(bool mapping)584*94c4a1e1SFrank Piva 	void inc_dirtied_slice(bool mapping) {
585*94c4a1e1SFrank Piva 		if (mapping)
586*94c4a1e1SFrank Piva 			mapping_stat.slice_dirtied += 1;
587*94c4a1e1SFrank Piva 		else
588*94c4a1e1SFrank Piva 			refcount_stat.slice_dirtied += 1;
589*94c4a1e1SFrank Piva 	}
590*94c4a1e1SFrank Piva 
dec_dirtied_slice(bool mapping)591*94c4a1e1SFrank Piva 	void dec_dirtied_slice(bool mapping) {
592*94c4a1e1SFrank Piva 		if (mapping)
593*94c4a1e1SFrank Piva 			mapping_stat.slice_dirtied -= 1;
594*94c4a1e1SFrank Piva 		else
595*94c4a1e1SFrank Piva 			refcount_stat.slice_dirtied -= 1;
596*94c4a1e1SFrank Piva 	}
597*94c4a1e1SFrank Piva 
598*94c4a1e1SFrank Piva 	Qcow2MetaFlushing(Qcow2State &qs);
599*94c4a1e1SFrank Piva 	void dump();
600*94c4a1e1SFrank Piva 	int alloc_tag(const struct ublksrv_queue *q);
601*94c4a1e1SFrank Piva 	void free_tag(const struct ublksrv_queue *q, int tag);
602*94c4a1e1SFrank Piva 	void run_flush(const struct ublksrv_queue *q, int queued);
603*94c4a1e1SFrank Piva 	bool is_flushing();
604*94c4a1e1SFrank Piva };
605*94c4a1e1SFrank Piva 
606*94c4a1e1SFrank Piva class Qcow2State {
607*94c4a1e1SFrank Piva private:
608*94c4a1e1SFrank Piva 	std::vector <Qcow2SliceMeta *> freed_slices;
609*94c4a1e1SFrank Piva public:
610*94c4a1e1SFrank Piva 	const struct ublksrv_ctrl_dev_info *dev_info;
611*94c4a1e1SFrank Piva 	unsigned min_bs_bits;
612*94c4a1e1SFrank Piva 	struct meta_mapping {
613*94c4a1e1SFrank Piva 		int nr;
614*94c4a1e1SFrank Piva 		std::vector <Qcow2MappingMeta *> meta;
615*94c4a1e1SFrank Piva 	};
616*94c4a1e1SFrank Piva 	typedef std::valarray<struct meta_mapping> MetaArray;
617*94c4a1e1SFrank Piva 
618*94c4a1e1SFrank Piva 	const struct ublksrv_dev *dev;
619*94c4a1e1SFrank Piva 	Qcow2Image img;
620*94c4a1e1SFrank Piva 	Qcow2Header header;
621*94c4a1e1SFrank Piva 
622*94c4a1e1SFrank Piva 	/* must be declared after header */
623*94c4a1e1SFrank Piva 	Qcow2L1Table l1_table;
624*94c4a1e1SFrank Piva 
625*94c4a1e1SFrank Piva 	/* must be declared after header */
626*94c4a1e1SFrank Piva 	Qcow2RefcountTable refcount_table;
627*94c4a1e1SFrank Piva 
628*94c4a1e1SFrank Piva 	Qcow2ClusterAllocator cluster_allocator;
629*94c4a1e1SFrank Piva 	Qcow2ClusterMapping cluster_map;
630*94c4a1e1SFrank Piva 
631*94c4a1e1SFrank Piva 	// map meta io object with one per-queue unique ID, which is set
632*94c4a1e1SFrank Piva 	// in sqe->user_data, so we can retrieve the meta io object by
633*94c4a1e1SFrank Piva 	// cqe->user_data after the io is done.
634*94c4a1e1SFrank Piva 	MetaArray meta_io_map;
635*94c4a1e1SFrank Piva 
636*94c4a1e1SFrank Piva 	Qcow2MetaFlushing meta_flushing;
637*94c4a1e1SFrank Piva 
638*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
639*94c4a1e1SFrank Piva 	std::unordered_map<u64, u64> cluster_use;
640*94c4a1e1SFrank Piva 	std::unordered_map<u64, u64> cluster_validate_map;
641*94c4a1e1SFrank Piva #endif
642*94c4a1e1SFrank Piva 
643*94c4a1e1SFrank Piva 	Qcow2State(const char *img_path, const struct ublksrv_dev *dev);
644*94c4a1e1SFrank Piva 	virtual ~Qcow2State();
645*94c4a1e1SFrank Piva 
646*94c4a1e1SFrank Piva 	virtual	u32 get_l1_table_max_size();
647*94c4a1e1SFrank Piva 	virtual	u64 get_l1_table_offset();
648*94c4a1e1SFrank Piva 
649*94c4a1e1SFrank Piva 	virtual	u32 get_refcount_table_max_size();
650*94c4a1e1SFrank Piva 	virtual	u32 get_refcount_table_act_size();
651*94c4a1e1SFrank Piva 	virtual	u64 get_refcount_table_offset();
652*94c4a1e1SFrank Piva 
get_meta_io(u32 qid,u32 pos)653*94c4a1e1SFrank Piva 	Qcow2MappingMeta *get_meta_io(u32 qid, u32 pos) {
654*94c4a1e1SFrank Piva 		return meta_io_map[qid].meta[pos];
655*94c4a1e1SFrank Piva 	}
656*94c4a1e1SFrank Piva 
del_meta_io(u32 qid,u32 pos)657*94c4a1e1SFrank Piva 	void del_meta_io(u32 qid, u32 pos) {
658*94c4a1e1SFrank Piva 		meta_io_map[qid].meta[pos] = nullptr;
659*94c4a1e1SFrank Piva 		meta_io_map[qid].nr--;
660*94c4a1e1SFrank Piva 
661*94c4a1e1SFrank Piva 		if (!meta_io_map[qid].nr)
662*94c4a1e1SFrank Piva 			meta_io_map[qid].meta.clear();
663*94c4a1e1SFrank Piva 	}
664*94c4a1e1SFrank Piva 
get_dev_size()665*94c4a1e1SFrank Piva 	u64 get_dev_size() {
666*94c4a1e1SFrank Piva 		return dev->tgt.dev_size;
667*94c4a1e1SFrank Piva 	}
668*94c4a1e1SFrank Piva 
get_min_flush_unit_bits()669*94c4a1e1SFrank Piva 	unsigned get_min_flush_unit_bits(){
670*94c4a1e1SFrank Piva 		return min_bs_bits;
671*94c4a1e1SFrank Piva 	}
672*94c4a1e1SFrank Piva 
add_slice_to_free_list(Qcow2SliceMeta * m)673*94c4a1e1SFrank Piva 	void add_slice_to_free_list(Qcow2SliceMeta *m) {
674*94c4a1e1SFrank Piva 		freed_slices.push_back(m);
675*94c4a1e1SFrank Piva 	}
676*94c4a1e1SFrank Piva 
677*94c4a1e1SFrank Piva 	void kill_slices(const struct ublksrv_queue *q);
678*94c4a1e1SFrank Piva 	u32 add_meta_io(u32 qid, Qcow2MappingMeta *m);
679*94c4a1e1SFrank Piva 	void dump_meta();
680*94c4a1e1SFrank Piva 	void reclaim_slice(Qcow2SliceMeta *m);
681*94c4a1e1SFrank Piva 	void remove_slice_from_evicted_list(Qcow2SliceMeta *m);
682*94c4a1e1SFrank Piva 	bool has_dirty_slice();
683*94c4a1e1SFrank Piva 	u32 get_l2_slices_count();
684*94c4a1e1SFrank Piva 	void shrink_cache();
685*94c4a1e1SFrank Piva 
686*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
687*94c4a1e1SFrank Piva 	void validate_cluster_use(u64 host_off, u64 virt_off, u32 use);
688*94c4a1e1SFrank Piva 	bool validate_cluster_map(u64 host_off, u64 virt_off);
689*94c4a1e1SFrank Piva #else
validate_cluster_use(u64 host_off,u64 virt_off,u32 use)690*94c4a1e1SFrank Piva 	void validate_cluster_use(u64 host_off, u64 virt_off, u32 use) {}
validate_cluster_map(u64 host_off,u64 virt_off)691*94c4a1e1SFrank Piva 	bool validate_cluster_map(u64 host_off, u64 virt_off) { return true;}
692*94c4a1e1SFrank Piva #endif
693*94c4a1e1SFrank Piva };
694*94c4a1e1SFrank Piva 
dev_to_qcow2state(const struct ublksrv_dev * dev)695*94c4a1e1SFrank Piva static inline Qcow2State *dev_to_qcow2state(const struct ublksrv_dev *dev)
696*94c4a1e1SFrank Piva {
697*94c4a1e1SFrank Piva 	return (Qcow2State *)dev->tgt.tgt_data;
698*94c4a1e1SFrank Piva }
699*94c4a1e1SFrank Piva 
queue_to_qcow2state(const struct ublksrv_queue * q)700*94c4a1e1SFrank Piva static inline Qcow2State *queue_to_qcow2state(const struct ublksrv_queue *q)
701*94c4a1e1SFrank Piva {
702*94c4a1e1SFrank Piva 	return (Qcow2State *)q->private_data;
703*94c4a1e1SFrank Piva }
704*94c4a1e1SFrank Piva 
705*94c4a1e1SFrank Piva Qcow2State *make_qcow2state(const char *file, struct ublksrv_dev *dev);
706*94c4a1e1SFrank Piva 
707*94c4a1e1SFrank Piva class Qcow2StatePlain : public Qcow2State {
708*94c4a1e1SFrank Piva public:
Qcow2StatePlain(const char * img_path,const struct ublksrv_dev * dev)709*94c4a1e1SFrank Piva 	Qcow2StatePlain(const char *img_path, const struct ublksrv_dev *dev):
710*94c4a1e1SFrank Piva 		Qcow2State(img_path, dev) {}
711*94c4a1e1SFrank Piva };
712*94c4a1e1SFrank Piva 
713*94c4a1e1SFrank Piva class Qcow2StateSnapshot : public Qcow2State {
714*94c4a1e1SFrank Piva public:
Qcow2StateSnapshot(const char * img_path,const struct ublksrv_dev * dev)715*94c4a1e1SFrank Piva 	Qcow2StateSnapshot(const char *img_path, const struct ublksrv_dev *dev):
716*94c4a1e1SFrank Piva 		Qcow2State(img_path, dev) {}
717*94c4a1e1SFrank Piva };
718*94c4a1e1SFrank Piva 
719*94c4a1e1SFrank Piva class Qcow2StateExternalDataFile : public Qcow2State {
720*94c4a1e1SFrank Piva public:
Qcow2StateExternalDataFile(const char * img_path,const struct ublksrv_dev * dev)721*94c4a1e1SFrank Piva 	Qcow2StateExternalDataFile(const char *img_path, const struct ublksrv_dev *dev):
722*94c4a1e1SFrank Piva 		Qcow2State(img_path, dev) {}
723*94c4a1e1SFrank Piva };
724*94c4a1e1SFrank Piva 
qcow2_meta_io_done(const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)725*94c4a1e1SFrank Piva static inline int qcow2_meta_io_done(const struct ublksrv_queue *q,
726*94c4a1e1SFrank Piva 		const struct io_uring_cqe *cqe)
727*94c4a1e1SFrank Piva {
728*94c4a1e1SFrank Piva 	if (!cqe)
729*94c4a1e1SFrank Piva 		return -EAGAIN;
730*94c4a1e1SFrank Piva 
731*94c4a1e1SFrank Piva 	int op = user_data_to_op(cqe->user_data);
732*94c4a1e1SFrank Piva 	int tag = user_data_to_tag(cqe->user_data);
733*94c4a1e1SFrank Piva 	u32 tgt_data = user_data_to_tgt_data(cqe->user_data);
734*94c4a1e1SFrank Piva 
735*94c4a1e1SFrank Piva 	/* plain IO's tgt_data is zero */
736*94c4a1e1SFrank Piva 	if (tgt_data == 0) {
737*94c4a1e1SFrank Piva 		ublk_err( "%s target data is zero for meta io(tag %d op %u %llx)\n",
738*94c4a1e1SFrank Piva 				__func__, tag, op, cqe->user_data);
739*94c4a1e1SFrank Piva 		return -EAGAIN;
740*94c4a1e1SFrank Piva 	}
741*94c4a1e1SFrank Piva 
742*94c4a1e1SFrank Piva 	Qcow2State *qs = queue_to_qcow2state(q);
743*94c4a1e1SFrank Piva 	/* retrieve meta data from target data part of cqe->user_data */
744*94c4a1e1SFrank Piva 	Qcow2MappingMeta *meta = qs->get_meta_io(q->q_id, tgt_data - 1);
745*94c4a1e1SFrank Piva 
746*94c4a1e1SFrank Piva 	if (cqe->res < 0)
747*94c4a1e1SFrank Piva 		ublk_err( "%s: tag %d op %d tgt_data %d meta %p userdata %d\n",
748*94c4a1e1SFrank Piva 			__func__, tag, user_data_to_op(cqe->user_data),
749*94c4a1e1SFrank Piva 			tgt_data, meta, cqe->res);
750*94c4a1e1SFrank Piva 	meta->io_done(*qs, q, cqe);
751*94c4a1e1SFrank Piva 
752*94c4a1e1SFrank Piva 	return -EAGAIN;
753*94c4a1e1SFrank Piva }
754*94c4a1e1SFrank Piva 
755*94c4a1e1SFrank Piva #endif
756