xref: /aosp_15_r20/external/ublksrv/qcow2/qcow2.h (revision 94c4a1e103eb1715230460aab379dff275992c20)
1 // SPDX-License-Identifier: GPL-2.0
2 #ifndef UBLK_QCOW2_H_
3 #define UBLK_QCOW2_H_
4 
5 #include <string>
6 #include <iostream>
7 #include <valarray>
8 #include <unordered_set>
9 #include <unordered_map>
10 #include <bits/stdc++.h>
11 #include <exception>
12 #include <chrono>
13 #include <deque>
14 #include "lrucache.hpp"
15 #include "qcow2_format.h"
16 #include "qcow2_meta.h"
17 
18 class Qcow2State;
19 class Qcow2Header;
20 
21 /*
22  * Design overview
23  *
24  * 1) code reuse:
25  *    - such as code can be reused as one libqcow2
26  *
27  *    - internal implementation maximize reusing design & code
28  *
29  * 2) io isolation: io handling code often depends on os or platform or
30  * user choice, so io handling isolation is considered from the beginning;
31  * but focus on aio style
32  *
33  * 3) completely aio: for read/write io and meta
34  */
35 
36 /* MQ support:
37  *
38  * 1) how to share meta data among queues?  meta data has to be protected for
39  * support MQ
40  *
41  * 2) we can start from SQ support.
42  */
43 
44 /*
45  * Buffer management and cache design:
46  *
47  * 1) fixed amount of buffer is pre-allocated & shared for all l2 cache slice,
48  * refcount blk, just like qcow2
49  *
50  * 2) fixed buffer is pre-allocated for header, l1, refcount table and other
51  * kind of meta, but the buffer is dedicated
52  *
53  * Cache design(L2 table cache, refcount block cache):
54  *
55  * 1) why can't support for l1/refcount table
56  *
57  */
58 
59 class MetaIoException: public std::exception
60 {
61 public:
what()62 	const char * what() { return "MetaIO exception"; }
63 };
64 
65 class MetaUpdateException: public std::exception
66 {
67 public:
what()68 	const char * what() { return "MetaEntry update exception"; }
69 };
70 
71 template <class T>
72 class slice_cache {
73 private:
74 	u8 slice_size_bits, cluster_size_bits, slice_virt_size_bits;
75 
76 	cache::lru_cache<u64, T *> slices;
77 	std::unordered_map<u64, T *> evicted_slices;
78 
79 	std::deque<T *> reclaimed_slices;
80 
81 	int __figure_group_for_flush(Qcow2State &qs);
82 	int figure_group_from_dirty_list(Qcow2State &qs);
83 public:
add_slice_to_reclaim_list(T * t)84 	void add_slice_to_reclaim_list(T *t) {
85 		reclaimed_slices.push_back(t);
86 	}
87 
pick_slice_from_reclaim_list()88 	T *pick_slice_from_reclaim_list() {
89 		if (reclaimed_slices.empty())
90 			return nullptr;
91 		auto t = reclaimed_slices.front();
92 		reclaimed_slices.pop_front();
93 
94 		return t;
95 	}
96 
get_nr_slices()97 	unsigned get_nr_slices() {
98 		return 1U << (cluster_size_bits - slice_size_bits);
99 	}
100 
get_slice_virt_size_bits()101 	u64 get_slice_virt_size_bits() {
102 		return slice_virt_size_bits;
103 	}
104 
get_slice_size_bits()105 	u64 get_slice_size_bits() {
106 		return slice_size_bits;
107 	}
108 
get_slices_size()109 	unsigned get_slices_size() {
110 		return slices.size();
111 	}
112 
get_evicted_slices_size()113 	unsigned get_evicted_slices_size() {
114 		return evicted_slices.size();
115 	}
116 
get_slice_idx(u64 virt_offset)117 	unsigned get_slice_idx(u64 virt_offset) {
118 		u32 nr_slices = 1ULL << (cluster_size_bits - slice_size_bits);
119 		const u64 virt_size = ((u64)nr_slices) << slice_virt_size_bits;
120 		u64 virt_base = virt_offset & ~(virt_size - 1);
121 
122 		return (virt_offset - virt_base) >> slice_virt_size_bits;
123 	}
124 
find_slice(u64 key,bool use_evicted_cache)125 	T *find_slice(u64 key, bool use_evicted_cache) {
126 		T *t = slices.get(key);
127 
128 		if (t)
129 			return t;
130 
131 		if (use_evicted_cache) {
132 			auto it = evicted_slices.find(key);
133 
134 			if (it != evicted_slices.end())
135 				return it->second;
136 		}
137 		return nullptr;
138 	}
139 
remove_slice_from_evicted_list(T * t)140 	void remove_slice_from_evicted_list(T *t) {
141 		auto it = evicted_slices.find(t->virt_offset());
142 
143 		if (it != evicted_slices.end())
144 			evicted_slices.erase(it);
145 	}
146 
147 	//called in running flush contex
has_evicted_dirty_slices()148 	bool has_evicted_dirty_slices()
149 	{
150 		if (evicted_slices.empty())
151 			return false;
152 
153 		for (auto it = evicted_slices.cbegin(); it !=
154 				evicted_slices.cend(); ++it) {
155 			if (it->second->get_dirty(-1))
156 				return true;
157 		}
158 		return false;
159 	}
160 
161 	slice_cache(u8 slice_bits, u8 cluster_bits, u8 slice_virt_bits,
162 			u32 max_size);
163 
164 	//only called from meta flushing code path
165 	T *__find_slice(u64 key, bool use_evicted_cache);
166 	T *alloc_slice(Qcow2State& qs, const qcow2_io_ctx_t &ioc,
167 		u64 virt_offset, u64 host_offset, u32 parent_idx);
168 	void add_slice_to_evicted_list(u64 virt_offset, T *l2);
169 	void dump(Qcow2State &qs);
170 	int figure_group_for_flush(Qcow2State &qs);
171 	bool has_dirty_slice(Qcow2State &qs);
172 	void shrink(Qcow2State &qs);
173 };
174 
175 /* todo: remove caches in destructor */
176 class Qcow2ClusterMapping {
177 private:
178 	Qcow2State &state;
179 	slice_cache <Qcow2L2Table> cache;
180 
181 	friend class Qcow2State;
182 
183 	u32 cluster_bits, l2_entries_order;
184 
185 	//l1/l2 entry alloc state
186 	//
187 	//added before allocating one l1/l2 entry, and freed after
188 	//the allocation is done
189 	//
190 	//For l1, the key is (1ULL << 63) | offset & ~((1ULL << (cluster_bits + l2 entries bits)) - 1)
191 	//
192 	//for l2, the key is offset & ~((1ULL << cluster_bits) - 1)
193 	std::unordered_map<u64, u32> entry_alloc;
194 	u32 max_alloc_entries;
195 
l2_slice_virt_size()196 	u64 l2_slice_virt_size() {
197 		return 1ULL << (cluster_bits + L2_TABLE_SLICE_BITS - 3);
198 	}
199 
l2_slice_key(u64 virt_offset)200 	u64 l2_slice_key(u64 virt_offset) {
201 		return ((virt_offset) & ~(l2_slice_virt_size() - 1));
202 	}
203 
__entry_get_alloc_state(u64 key)204 	u32 __entry_get_alloc_state(u64 key) {
205 		auto it = entry_alloc.find(key);
206 
207 		if (it != entry_alloc.end())
208 			return it->second;
209 		return -1;
210 	}
211 
__entry_is_allocating(u64 key)212 	bool __entry_is_allocating(u64 key) {
213 		u32 state = __entry_get_alloc_state(key);
214 
215 		return state != -1;
216 	}
217 
__entry_mark_allocating(u64 key,u32 owner)218 	void __entry_mark_allocating(u64 key, u32 owner) {
219 		auto it = entry_alloc.find(key);
220 		u32 sz;
221 
222 		qcow2_assert(it == entry_alloc.end());
223 
224 		entry_alloc[key] = owner;
225 
226 		sz = entry_alloc.size();
227 		if (sz > max_alloc_entries)
228 			max_alloc_entries = sz;
229 	}
230 
__entry_mark_allocated(u64 key)231 	void __entry_mark_allocated(u64 key) {
232 		auto it = entry_alloc.find(key);
233 
234 		qcow2_assert(it != entry_alloc.end());
235 
236 		entry_alloc.erase(it);
237 	}
238 
l1_entry_alloc_key(u64 offset)239 	u64 l1_entry_alloc_key(u64 offset) {
240 		return (offset & ~((1ULL << (cluster_bits +
241 					     l2_entries_order)) - 1)) |
242 				(1ULL << 63);
243 	}
244 
l2_entry_alloc_key(u64 offset)245 	u64 l2_entry_alloc_key(u64 offset) {
246 		u64 key = (offset & ~((1ULL << cluster_bits) - 1));
247 
248 		qcow2_assert(!(key & (1ULL << 63)));
249 		return key;
250 	}
251 
entry_alloc_key(u64 offset,bool l1)252 	u64 entry_alloc_key(u64 offset, bool l1) {
253 		if (l1)
254 			return l1_entry_alloc_key(offset);
255 		return l2_entry_alloc_key(offset);
256 	}
257 
entry_is_allocating(u64 offset,bool l1)258 	bool entry_is_allocating(u64 offset, bool l1) {
259 		u64 key = entry_alloc_key(offset, l1);
260 
261 		return __entry_is_allocating(key);
262 	}
263 
entry_get_alloc_owner(u64 offset,bool l1)264 	u32 entry_get_alloc_owner(u64 offset, bool l1) {
265 		u64 key = entry_alloc_key(offset, l1);
266 		u32 state = __entry_get_alloc_state(key);
267 
268 		qcow2_assert(state != -1);
269 		return state;
270 	}
271 
entry_mark_allocating(u64 offset,u32 owner,bool l1)272 	void entry_mark_allocating(u64 offset, u32 owner, bool l1) {
273 		u64 key = entry_alloc_key(offset, l1);
274 
275 		__entry_mark_allocating(key, owner);
276 	}
277 
entry_mark_allocated(u64 offset,bool l1)278 	void entry_mark_allocated(u64 offset, bool l1) {
279 		u64 key = entry_alloc_key(offset, l1);
280 
281 		__entry_mark_allocated(key);
282 	}
283 
284 	Qcow2L2Table *create_and_add_l2(const qcow2_io_ctx_t &ioc, u64 offset);
285 	Qcow2L2Table *load_l2_slice(const qcow2_io_ctx_t &ioc, u64 offset,
286 			u64 l1_entry);
287 	int build_mapping(const qcow2_io_ctx_t &ioc,
288 		u64 virt_offset, Qcow2L2Table *l2, u32 idx_in_slice,
289 		u64 *l2_entry);
290 	u64 __map_cluster(const qcow2_io_ctx_t &ioc,
291 		Qcow2L2Table *l2, u64 offset, bool create_l2);
292 	Qcow2L2Table *create_l2_map(const qcow2_io_ctx_t &ioc, u64 offset,
293 			bool create_l2);
294 public:
295 	// refcount table shouldn't be so big
296 	Qcow2ClusterMapping(Qcow2State &qs);
297 
298 	//the main logic for mapping cluster
299 	//create l2 and setup the mapping if 'create_l2' is true & l2 isn't
300 	//present for this 'offset'
301 	u64 map_cluster(const qcow2_io_ctx_t &ioc, u64 offset, bool create_l2);
302 	int figure_group_from_l1_table();
303 
304 	Qcow2L2Table* __find_slice(u64 key, bool use_dirty=true);
305 
l1_idx(u64 offset)306 	u64 l1_idx(u64 offset) {
307 		return offset >> (cluster_bits + l2_entries_order);
308 	}
309 
l2_idx(u64 offset)310 	u64 l2_idx(u64 offset) {
311 		return (offset >> cluster_bits) &
312 			((1ULL << l2_entries_order) - 1);
313 	}
314 
has_evicted_dirty_slices()315 	bool has_evicted_dirty_slices()
316 	{
317 		return cache.has_evicted_dirty_slices();
318 	}
319 
320 	void dump_meta();
321 };
322 
323 enum QCOW2_CLUSTER_USE {
324 	L2_TABLE = 0,
325 	REFCOUNT_BLK = 1,
326 	DATA = 2,
327 };
328 
329 /*
330  * Think about lifetime issue. Is it possible that one state is removed
331  * but it is being used somewhere?
332  *
333  * So far the simple rule is that the state can only be removed after
334  * its state becomes QCOW2_ALLOC_ZEROED.
335  *
336  * So except for being absolute safety, don't call get_cluster_state()
337  * directly.
338  */
339 class Qcow2ClusterState {
340 #define QCOW2_ALLOC_STARTED	0	//cluster allocated in ram
341 #define QCOW2_ALLOC_ZEROING	1	//IO for zeroing this cluster is submitted
342 #define QCOW2_ALLOC_ZEROED	2	//cluster zeroed
343 #define QCOW2_ALLOC_DONE	3	//mapping setup
344 private:
345 	u8 state;
346 	u8 purpose;
347 	IOWaiters io_waiters;
348 
349 public:
Qcow2ClusterState()350 	Qcow2ClusterState() {
351 		state = QCOW2_ALLOC_STARTED;
352 	}
353 
Qcow2ClusterState(u8 s,u8 p)354 	Qcow2ClusterState(u8 s, u8 p) {
355 		state = s;
356 		purpose = p;
357 	}
358 
359 	//called after the cluster is allocated from ram
get_state()360 	u8 get_state() {
361 		return state;
362 	}
363 
set_state(u8 s)364 	void set_state(u8 s) {
365 		state = s;
366 	}
367 
get_purpose()368 	u8 get_purpose() {
369 		return purpose;
370 	}
371 
add_waiter(unsigned tag)372 	void add_waiter(unsigned tag) {
373 		io_waiters.add_waiter(tag);
374 	}
375 
wakeup_all(const struct ublksrv_queue * q,unsigned my_tag)376 	void wakeup_all(const struct ublksrv_queue *q, unsigned my_tag) {
377 		io_waiters.wakeup_all(q, my_tag);
378 	}
379 };
380 
381 /* todo: remove caches in destructor */
382 class Qcow2ClusterAllocator {
383 private:
384 	Qcow2State &state;
385 	s32 slice_idx;
386 	u8  table_entry_virt_size_bits;
387 	u64 alloc_cnt;
388 	slice_cache <Qcow2RefcountBlock> cache;
389 
390 	u32 refcount_block_entries();
391 	void allocate_refcount_blk(const qcow2_io_ctx_t &ioc, s32 idx);
392 
393 	friend class Qcow2State;
394 
395 public:
396 	//key is cluster start offset, val is its allocate status
397 	std::unordered_map<u64, Qcow2ClusterState *> alloc_state;
398 	u32 max_alloc_states;
399 	u64 max_physical_size;
400 
401 	// refcount table shouldn't be so big
402 	Qcow2ClusterAllocator(Qcow2State &qs);
403 
404 	//called after refcount table is loaded
405 	void setup();
406 	u64 allocate_cluster(const qcow2_io_ctx_t &ioc);
407 	u64 refcount_blk_key(const Qcow2RefcountBlock *rb);
408 	void dump_meta();
409 	int figure_group_from_refcount_table();
410 
411 	Qcow2RefcountBlock* __find_slice(u64 key);
412 
has_evicted_dirty_slices()413 	bool has_evicted_dirty_slices()
414 	{
415 		return cache.has_evicted_dirty_slices();
416 	}
417 
418 	/* the following helpers are for implementing soft update */
419 
420 	//don't refer to one state after one cycle of coroutine wait &
421 	//wakeup, and caller has to check if the return value
get_cluster_state(u64 cluster_offset)422 	Qcow2ClusterState *get_cluster_state(u64 cluster_offset) {
423 		auto it = alloc_state.find(cluster_offset);
424 
425 		if (it == alloc_state.end())
426 			return nullptr;
427 
428 		return it->second;
429 	}
430 
431 	//the zeroing io may return -EAGAIN, then we need to
432 	//reset its state for re-issuing zeroing IO
alloc_cluster_reset(u64 cluster_offset)433 	bool alloc_cluster_reset(u64 cluster_offset) {
434 		auto it = alloc_state.find(cluster_offset);
435 
436 		if (it == alloc_state.end())
437 			return false;
438 
439 		//maybe the cluster has been zeroed, so double check
440 		if (it->second->get_state() < QCOW2_ALLOC_ZEROED) {
441 			it->second->set_state(QCOW2_ALLOC_STARTED);
442 			return true;
443 		}
444 		return false;
445 	}
446 
447 	//called after the cluster is allocated from ram
448 	void alloc_cluster_started(const qcow2_io_ctx_t &ioc,
449 			u64 cluster_offset, u8 purpose);
450 
451 	//check if the allocated cluster is zeroed
alloc_cluster_is_zeroed(u64 cluster_offset)452 	bool alloc_cluster_is_zeroed(u64 cluster_offset) {
453 		Qcow2ClusterState * cs = get_cluster_state(cluster_offset);
454 
455 		return cs == nullptr || cs->get_state() >= QCOW2_ALLOC_ZEROED;
456 	}
457 
458 	//called after IO for zeroing this cluster is started
459 	void alloc_cluster_zeroing(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
460 
461 	//called after the cluster is zeroed
462 	void alloc_cluster_zeroed(const struct ublksrv_queue *q,
463 			int tag, u64 cluster_offset);
464 
465 	//called after the cluster is zeroed and associated mapping is updated
466 	void alloc_cluster_done(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
467 
468 	//called after the cluster is zeroed and associated mapping is updated
469 	void alloc_cluster_add_waiter(const qcow2_io_ctx_t &ioc,
470 			u64 cluster_offset);
471 };
472 
473 class Qcow2Image {
474 private:
475 	std::string	fpath;
476 public:
477 	int fd;
478 	Qcow2Image(const char *path);
479 	~Qcow2Image();
480 };
481 
482 enum qcow2_meta_flush {
483 	IDLE,
484 	PREP_WRITE_SLICES, //all slices are added to list for flush
485 	ZERO_MY_CLUSTER,
486 	WAIT,	//valid only for mapping table, wait for refcount table flushing done
487 	WRITE_SLICES,
488 	WRITE_TOP,
489 	DONE,
490 };
491 
492 class MetaFlushingState {
493 private:
494 	// for flushing slices depended by current parent_idx, and for
495 	// handling state of WRITE_SLICE
496 	//
497 	//any slices depended by current parent_idx are added to this list,
498 	//and it is removed after the flushing is done
499 	//
500 	//once the list becomes empty, the state is switched to
501 	//WRITE_TOP.
502 	std::vector <Qcow2SliceMeta *> slices_to_flush;
503 	std::vector <Qcow2SliceMeta *> slices_in_flight;
504 	unsigned state;
505 	int parent_blk_idx;
506 	int parent_entry_idx;
507 	bool mapping;
508 
509 	void del_meta_from_list(std::vector <Qcow2SliceMeta *> &v,
510 		const Qcow2SliceMeta *t);
511 
512 	void __prep_write_slice(Qcow2State &qs, const struct ublksrv_queue *q);
513 
514 	void __zero_my_cluster(Qcow2State &qs, const struct ublksrv_queue *q);
515 	co_io_job __zero_my_cluster_co(Qcow2State &qs,
516 		const struct ublksrv_queue *q, struct ublk_io_tgt *io, int tag,
517 		Qcow2SliceMeta *m);
518 
519 	void __write_slices(Qcow2State &qs, const struct ublksrv_queue *q);
520 	co_io_job __write_slice_co(Qcow2State &qs,
521 		const struct ublksrv_queue *q, Qcow2SliceMeta *m,
522 		struct ublk_io_tgt *io, int tag);
523 
524 	void __write_top(Qcow2State &qs, const struct ublksrv_queue *q);
525 	co_io_job  __write_top_co(Qcow2State &qs, const struct ublksrv_queue *q,
526 			struct ublk_io_tgt *io, int tag);
527 
528 	void __done(Qcow2State &qs, const struct ublksrv_queue *q);
529 	bool __need_flush(int queued);
530 	void mark_no_update();
531 public:
532 	Qcow2TopTable &top;
533 	unsigned slice_dirtied;
534 	std::chrono::system_clock::time_point last_flush;
535 
get_state()536 	unsigned get_state() const {
537 		return state;
538 	}
set_state(u32 s)539 	void set_state(u32 s) {
540 		ublk_dbg(UBLK_DBG_QCOW2_FLUSH, "%s: map %d slice_dirtied %u parent_blk_idx %d"
541 				" parent_entry_idx %d %d->%d to_flush %zd in_flight %zd\n",
542 				__func__, mapping, slice_dirtied,
543 				parent_blk_idx, parent_entry_idx, state,
544 				s, slices_to_flush.size(),
545 				slices_in_flight.size());
546 		state = s;
547 	}
548 
549 	MetaFlushingState(Qcow2TopTable &t, bool is_mapping);
550 	void slice_is_done(const Qcow2SliceMeta*);
551 	void add_slice_to_flush(Qcow2SliceMeta *m);
552 	void run_flush(Qcow2State &qs, const struct ublksrv_queue *q,
553 			int top_blk_idx);
554 	bool need_flush(Qcow2State &qs, int *top_idx, unsigned queued);
555 	void dump(const char *func, int line) const;
556 	int calc_refcount_dirty_blk_range(Qcow2State& qs,
557 			int *refcnt_blk_start, int *refcnt_blk_end);
558 };
559 
560 /*
561  * For any kind of meta flushing, one tag or io slot is required,
562  * so start the meta flushing class with meta tag allocator.
563  *
564  * Meta data updating is never forground task, so if running out
565  * of tags, let's wait until one tag is released.
566  */
567 class Qcow2MetaFlushing {
568 private:
569 	std::vector <bool> tags;
570 
571 	int refcnt_blk_start;
572 	int refcnt_blk_end;
573 
574 	bool handle_mapping_dependency_start_end(Qcow2State *qs,
575 			const struct ublksrv_queue *q);
576 	void handle_mapping_dependency(Qcow2State *qs,
577 			const struct ublksrv_queue *q);
578 public:
579 	Qcow2State &state;
580 
581 	MetaFlushingState mapping_stat;
582 	MetaFlushingState refcount_stat;
583 
inc_dirtied_slice(bool mapping)584 	void inc_dirtied_slice(bool mapping) {
585 		if (mapping)
586 			mapping_stat.slice_dirtied += 1;
587 		else
588 			refcount_stat.slice_dirtied += 1;
589 	}
590 
dec_dirtied_slice(bool mapping)591 	void dec_dirtied_slice(bool mapping) {
592 		if (mapping)
593 			mapping_stat.slice_dirtied -= 1;
594 		else
595 			refcount_stat.slice_dirtied -= 1;
596 	}
597 
598 	Qcow2MetaFlushing(Qcow2State &qs);
599 	void dump();
600 	int alloc_tag(const struct ublksrv_queue *q);
601 	void free_tag(const struct ublksrv_queue *q, int tag);
602 	void run_flush(const struct ublksrv_queue *q, int queued);
603 	bool is_flushing();
604 };
605 
606 class Qcow2State {
607 private:
608 	std::vector <Qcow2SliceMeta *> freed_slices;
609 public:
610 	const struct ublksrv_ctrl_dev_info *dev_info;
611 	unsigned min_bs_bits;
612 	struct meta_mapping {
613 		int nr;
614 		std::vector <Qcow2MappingMeta *> meta;
615 	};
616 	typedef std::valarray<struct meta_mapping> MetaArray;
617 
618 	const struct ublksrv_dev *dev;
619 	Qcow2Image img;
620 	Qcow2Header header;
621 
622 	/* must be declared after header */
623 	Qcow2L1Table l1_table;
624 
625 	/* must be declared after header */
626 	Qcow2RefcountTable refcount_table;
627 
628 	Qcow2ClusterAllocator cluster_allocator;
629 	Qcow2ClusterMapping cluster_map;
630 
631 	// map meta io object with one per-queue unique ID, which is set
632 	// in sqe->user_data, so we can retrieve the meta io object by
633 	// cqe->user_data after the io is done.
634 	MetaArray meta_io_map;
635 
636 	Qcow2MetaFlushing meta_flushing;
637 
638 #ifdef DEBUG_QCOW2_META_VALIDATE
639 	std::unordered_map<u64, u64> cluster_use;
640 	std::unordered_map<u64, u64> cluster_validate_map;
641 #endif
642 
643 	Qcow2State(const char *img_path, const struct ublksrv_dev *dev);
644 	virtual ~Qcow2State();
645 
646 	virtual	u32 get_l1_table_max_size();
647 	virtual	u64 get_l1_table_offset();
648 
649 	virtual	u32 get_refcount_table_max_size();
650 	virtual	u32 get_refcount_table_act_size();
651 	virtual	u64 get_refcount_table_offset();
652 
get_meta_io(u32 qid,u32 pos)653 	Qcow2MappingMeta *get_meta_io(u32 qid, u32 pos) {
654 		return meta_io_map[qid].meta[pos];
655 	}
656 
del_meta_io(u32 qid,u32 pos)657 	void del_meta_io(u32 qid, u32 pos) {
658 		meta_io_map[qid].meta[pos] = nullptr;
659 		meta_io_map[qid].nr--;
660 
661 		if (!meta_io_map[qid].nr)
662 			meta_io_map[qid].meta.clear();
663 	}
664 
get_dev_size()665 	u64 get_dev_size() {
666 		return dev->tgt.dev_size;
667 	}
668 
get_min_flush_unit_bits()669 	unsigned get_min_flush_unit_bits(){
670 		return min_bs_bits;
671 	}
672 
add_slice_to_free_list(Qcow2SliceMeta * m)673 	void add_slice_to_free_list(Qcow2SliceMeta *m) {
674 		freed_slices.push_back(m);
675 	}
676 
677 	void kill_slices(const struct ublksrv_queue *q);
678 	u32 add_meta_io(u32 qid, Qcow2MappingMeta *m);
679 	void dump_meta();
680 	void reclaim_slice(Qcow2SliceMeta *m);
681 	void remove_slice_from_evicted_list(Qcow2SliceMeta *m);
682 	bool has_dirty_slice();
683 	u32 get_l2_slices_count();
684 	void shrink_cache();
685 
686 #ifdef DEBUG_QCOW2_META_VALIDATE
687 	void validate_cluster_use(u64 host_off, u64 virt_off, u32 use);
688 	bool validate_cluster_map(u64 host_off, u64 virt_off);
689 #else
validate_cluster_use(u64 host_off,u64 virt_off,u32 use)690 	void validate_cluster_use(u64 host_off, u64 virt_off, u32 use) {}
validate_cluster_map(u64 host_off,u64 virt_off)691 	bool validate_cluster_map(u64 host_off, u64 virt_off) { return true;}
692 #endif
693 };
694 
dev_to_qcow2state(const struct ublksrv_dev * dev)695 static inline Qcow2State *dev_to_qcow2state(const struct ublksrv_dev *dev)
696 {
697 	return (Qcow2State *)dev->tgt.tgt_data;
698 }
699 
queue_to_qcow2state(const struct ublksrv_queue * q)700 static inline Qcow2State *queue_to_qcow2state(const struct ublksrv_queue *q)
701 {
702 	return (Qcow2State *)q->private_data;
703 }
704 
705 Qcow2State *make_qcow2state(const char *file, struct ublksrv_dev *dev);
706 
707 class Qcow2StatePlain : public Qcow2State {
708 public:
Qcow2StatePlain(const char * img_path,const struct ublksrv_dev * dev)709 	Qcow2StatePlain(const char *img_path, const struct ublksrv_dev *dev):
710 		Qcow2State(img_path, dev) {}
711 };
712 
713 class Qcow2StateSnapshot : public Qcow2State {
714 public:
Qcow2StateSnapshot(const char * img_path,const struct ublksrv_dev * dev)715 	Qcow2StateSnapshot(const char *img_path, const struct ublksrv_dev *dev):
716 		Qcow2State(img_path, dev) {}
717 };
718 
719 class Qcow2StateExternalDataFile : public Qcow2State {
720 public:
Qcow2StateExternalDataFile(const char * img_path,const struct ublksrv_dev * dev)721 	Qcow2StateExternalDataFile(const char *img_path, const struct ublksrv_dev *dev):
722 		Qcow2State(img_path, dev) {}
723 };
724 
qcow2_meta_io_done(const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)725 static inline int qcow2_meta_io_done(const struct ublksrv_queue *q,
726 		const struct io_uring_cqe *cqe)
727 {
728 	if (!cqe)
729 		return -EAGAIN;
730 
731 	int op = user_data_to_op(cqe->user_data);
732 	int tag = user_data_to_tag(cqe->user_data);
733 	u32 tgt_data = user_data_to_tgt_data(cqe->user_data);
734 
735 	/* plain IO's tgt_data is zero */
736 	if (tgt_data == 0) {
737 		ublk_err( "%s target data is zero for meta io(tag %d op %u %llx)\n",
738 				__func__, tag, op, cqe->user_data);
739 		return -EAGAIN;
740 	}
741 
742 	Qcow2State *qs = queue_to_qcow2state(q);
743 	/* retrieve meta data from target data part of cqe->user_data */
744 	Qcow2MappingMeta *meta = qs->get_meta_io(q->q_id, tgt_data - 1);
745 
746 	if (cqe->res < 0)
747 		ublk_err( "%s: tag %d op %d tgt_data %d meta %p userdata %d\n",
748 			__func__, tag, user_data_to_op(cqe->user_data),
749 			tgt_data, meta, cqe->res);
750 	meta->io_done(*qs, q, cqe);
751 
752 	return -EAGAIN;
753 }
754 
755 #endif
756