1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #ifndef UBLK_QCOW2_H_
3*94c4a1e1SFrank Piva #define UBLK_QCOW2_H_
4*94c4a1e1SFrank Piva
5*94c4a1e1SFrank Piva #include <string>
6*94c4a1e1SFrank Piva #include <iostream>
7*94c4a1e1SFrank Piva #include <valarray>
8*94c4a1e1SFrank Piva #include <unordered_set>
9*94c4a1e1SFrank Piva #include <unordered_map>
10*94c4a1e1SFrank Piva #include <bits/stdc++.h>
11*94c4a1e1SFrank Piva #include <exception>
12*94c4a1e1SFrank Piva #include <chrono>
13*94c4a1e1SFrank Piva #include <deque>
14*94c4a1e1SFrank Piva #include "lrucache.hpp"
15*94c4a1e1SFrank Piva #include "qcow2_format.h"
16*94c4a1e1SFrank Piva #include "qcow2_meta.h"
17*94c4a1e1SFrank Piva
18*94c4a1e1SFrank Piva class Qcow2State;
19*94c4a1e1SFrank Piva class Qcow2Header;
20*94c4a1e1SFrank Piva
21*94c4a1e1SFrank Piva /*
22*94c4a1e1SFrank Piva * Design overview
23*94c4a1e1SFrank Piva *
24*94c4a1e1SFrank Piva * 1) code reuse:
25*94c4a1e1SFrank Piva * - such as code can be reused as one libqcow2
26*94c4a1e1SFrank Piva *
27*94c4a1e1SFrank Piva * - internal implementation maximize reusing design & code
28*94c4a1e1SFrank Piva *
29*94c4a1e1SFrank Piva * 2) io isolation: io handling code often depends on os or platform or
30*94c4a1e1SFrank Piva * user choice, so io handling isolation is considered from the beginning;
31*94c4a1e1SFrank Piva * but focus on aio style
32*94c4a1e1SFrank Piva *
33*94c4a1e1SFrank Piva * 3) completely aio: for read/write io and meta
34*94c4a1e1SFrank Piva */
35*94c4a1e1SFrank Piva
36*94c4a1e1SFrank Piva /* MQ support:
37*94c4a1e1SFrank Piva *
38*94c4a1e1SFrank Piva * 1) how to share meta data among queues? meta data has to be protected for
39*94c4a1e1SFrank Piva * support MQ
40*94c4a1e1SFrank Piva *
41*94c4a1e1SFrank Piva * 2) we can start from SQ support.
42*94c4a1e1SFrank Piva */
43*94c4a1e1SFrank Piva
44*94c4a1e1SFrank Piva /*
45*94c4a1e1SFrank Piva * Buffer management and cache design:
46*94c4a1e1SFrank Piva *
47*94c4a1e1SFrank Piva * 1) fixed amount of buffer is pre-allocated & shared for all l2 cache slice,
48*94c4a1e1SFrank Piva * refcount blk, just like qcow2
49*94c4a1e1SFrank Piva *
50*94c4a1e1SFrank Piva * 2) fixed buffer is pre-allocated for header, l1, refcount table and other
51*94c4a1e1SFrank Piva * kind of meta, but the buffer is dedicated
52*94c4a1e1SFrank Piva *
53*94c4a1e1SFrank Piva * Cache design(L2 table cache, refcount block cache):
54*94c4a1e1SFrank Piva *
55*94c4a1e1SFrank Piva * 1) why can't support for l1/refcount table
56*94c4a1e1SFrank Piva *
57*94c4a1e1SFrank Piva */
58*94c4a1e1SFrank Piva
59*94c4a1e1SFrank Piva class MetaIoException: public std::exception
60*94c4a1e1SFrank Piva {
61*94c4a1e1SFrank Piva public:
what()62*94c4a1e1SFrank Piva const char * what() { return "MetaIO exception"; }
63*94c4a1e1SFrank Piva };
64*94c4a1e1SFrank Piva
65*94c4a1e1SFrank Piva class MetaUpdateException: public std::exception
66*94c4a1e1SFrank Piva {
67*94c4a1e1SFrank Piva public:
what()68*94c4a1e1SFrank Piva const char * what() { return "MetaEntry update exception"; }
69*94c4a1e1SFrank Piva };
70*94c4a1e1SFrank Piva
71*94c4a1e1SFrank Piva template <class T>
72*94c4a1e1SFrank Piva class slice_cache {
73*94c4a1e1SFrank Piva private:
74*94c4a1e1SFrank Piva u8 slice_size_bits, cluster_size_bits, slice_virt_size_bits;
75*94c4a1e1SFrank Piva
76*94c4a1e1SFrank Piva cache::lru_cache<u64, T *> slices;
77*94c4a1e1SFrank Piva std::unordered_map<u64, T *> evicted_slices;
78*94c4a1e1SFrank Piva
79*94c4a1e1SFrank Piva std::deque<T *> reclaimed_slices;
80*94c4a1e1SFrank Piva
81*94c4a1e1SFrank Piva int __figure_group_for_flush(Qcow2State &qs);
82*94c4a1e1SFrank Piva int figure_group_from_dirty_list(Qcow2State &qs);
83*94c4a1e1SFrank Piva public:
add_slice_to_reclaim_list(T * t)84*94c4a1e1SFrank Piva void add_slice_to_reclaim_list(T *t) {
85*94c4a1e1SFrank Piva reclaimed_slices.push_back(t);
86*94c4a1e1SFrank Piva }
87*94c4a1e1SFrank Piva
pick_slice_from_reclaim_list()88*94c4a1e1SFrank Piva T *pick_slice_from_reclaim_list() {
89*94c4a1e1SFrank Piva if (reclaimed_slices.empty())
90*94c4a1e1SFrank Piva return nullptr;
91*94c4a1e1SFrank Piva auto t = reclaimed_slices.front();
92*94c4a1e1SFrank Piva reclaimed_slices.pop_front();
93*94c4a1e1SFrank Piva
94*94c4a1e1SFrank Piva return t;
95*94c4a1e1SFrank Piva }
96*94c4a1e1SFrank Piva
get_nr_slices()97*94c4a1e1SFrank Piva unsigned get_nr_slices() {
98*94c4a1e1SFrank Piva return 1U << (cluster_size_bits - slice_size_bits);
99*94c4a1e1SFrank Piva }
100*94c4a1e1SFrank Piva
get_slice_virt_size_bits()101*94c4a1e1SFrank Piva u64 get_slice_virt_size_bits() {
102*94c4a1e1SFrank Piva return slice_virt_size_bits;
103*94c4a1e1SFrank Piva }
104*94c4a1e1SFrank Piva
get_slice_size_bits()105*94c4a1e1SFrank Piva u64 get_slice_size_bits() {
106*94c4a1e1SFrank Piva return slice_size_bits;
107*94c4a1e1SFrank Piva }
108*94c4a1e1SFrank Piva
get_slices_size()109*94c4a1e1SFrank Piva unsigned get_slices_size() {
110*94c4a1e1SFrank Piva return slices.size();
111*94c4a1e1SFrank Piva }
112*94c4a1e1SFrank Piva
get_evicted_slices_size()113*94c4a1e1SFrank Piva unsigned get_evicted_slices_size() {
114*94c4a1e1SFrank Piva return evicted_slices.size();
115*94c4a1e1SFrank Piva }
116*94c4a1e1SFrank Piva
get_slice_idx(u64 virt_offset)117*94c4a1e1SFrank Piva unsigned get_slice_idx(u64 virt_offset) {
118*94c4a1e1SFrank Piva u32 nr_slices = 1ULL << (cluster_size_bits - slice_size_bits);
119*94c4a1e1SFrank Piva const u64 virt_size = ((u64)nr_slices) << slice_virt_size_bits;
120*94c4a1e1SFrank Piva u64 virt_base = virt_offset & ~(virt_size - 1);
121*94c4a1e1SFrank Piva
122*94c4a1e1SFrank Piva return (virt_offset - virt_base) >> slice_virt_size_bits;
123*94c4a1e1SFrank Piva }
124*94c4a1e1SFrank Piva
find_slice(u64 key,bool use_evicted_cache)125*94c4a1e1SFrank Piva T *find_slice(u64 key, bool use_evicted_cache) {
126*94c4a1e1SFrank Piva T *t = slices.get(key);
127*94c4a1e1SFrank Piva
128*94c4a1e1SFrank Piva if (t)
129*94c4a1e1SFrank Piva return t;
130*94c4a1e1SFrank Piva
131*94c4a1e1SFrank Piva if (use_evicted_cache) {
132*94c4a1e1SFrank Piva auto it = evicted_slices.find(key);
133*94c4a1e1SFrank Piva
134*94c4a1e1SFrank Piva if (it != evicted_slices.end())
135*94c4a1e1SFrank Piva return it->second;
136*94c4a1e1SFrank Piva }
137*94c4a1e1SFrank Piva return nullptr;
138*94c4a1e1SFrank Piva }
139*94c4a1e1SFrank Piva
remove_slice_from_evicted_list(T * t)140*94c4a1e1SFrank Piva void remove_slice_from_evicted_list(T *t) {
141*94c4a1e1SFrank Piva auto it = evicted_slices.find(t->virt_offset());
142*94c4a1e1SFrank Piva
143*94c4a1e1SFrank Piva if (it != evicted_slices.end())
144*94c4a1e1SFrank Piva evicted_slices.erase(it);
145*94c4a1e1SFrank Piva }
146*94c4a1e1SFrank Piva
147*94c4a1e1SFrank Piva //called in running flush contex
has_evicted_dirty_slices()148*94c4a1e1SFrank Piva bool has_evicted_dirty_slices()
149*94c4a1e1SFrank Piva {
150*94c4a1e1SFrank Piva if (evicted_slices.empty())
151*94c4a1e1SFrank Piva return false;
152*94c4a1e1SFrank Piva
153*94c4a1e1SFrank Piva for (auto it = evicted_slices.cbegin(); it !=
154*94c4a1e1SFrank Piva evicted_slices.cend(); ++it) {
155*94c4a1e1SFrank Piva if (it->second->get_dirty(-1))
156*94c4a1e1SFrank Piva return true;
157*94c4a1e1SFrank Piva }
158*94c4a1e1SFrank Piva return false;
159*94c4a1e1SFrank Piva }
160*94c4a1e1SFrank Piva
161*94c4a1e1SFrank Piva slice_cache(u8 slice_bits, u8 cluster_bits, u8 slice_virt_bits,
162*94c4a1e1SFrank Piva u32 max_size);
163*94c4a1e1SFrank Piva
164*94c4a1e1SFrank Piva //only called from meta flushing code path
165*94c4a1e1SFrank Piva T *__find_slice(u64 key, bool use_evicted_cache);
166*94c4a1e1SFrank Piva T *alloc_slice(Qcow2State& qs, const qcow2_io_ctx_t &ioc,
167*94c4a1e1SFrank Piva u64 virt_offset, u64 host_offset, u32 parent_idx);
168*94c4a1e1SFrank Piva void add_slice_to_evicted_list(u64 virt_offset, T *l2);
169*94c4a1e1SFrank Piva void dump(Qcow2State &qs);
170*94c4a1e1SFrank Piva int figure_group_for_flush(Qcow2State &qs);
171*94c4a1e1SFrank Piva bool has_dirty_slice(Qcow2State &qs);
172*94c4a1e1SFrank Piva void shrink(Qcow2State &qs);
173*94c4a1e1SFrank Piva };
174*94c4a1e1SFrank Piva
175*94c4a1e1SFrank Piva /* todo: remove caches in destructor */
176*94c4a1e1SFrank Piva class Qcow2ClusterMapping {
177*94c4a1e1SFrank Piva private:
178*94c4a1e1SFrank Piva Qcow2State &state;
179*94c4a1e1SFrank Piva slice_cache <Qcow2L2Table> cache;
180*94c4a1e1SFrank Piva
181*94c4a1e1SFrank Piva friend class Qcow2State;
182*94c4a1e1SFrank Piva
183*94c4a1e1SFrank Piva u32 cluster_bits, l2_entries_order;
184*94c4a1e1SFrank Piva
185*94c4a1e1SFrank Piva //l1/l2 entry alloc state
186*94c4a1e1SFrank Piva //
187*94c4a1e1SFrank Piva //added before allocating one l1/l2 entry, and freed after
188*94c4a1e1SFrank Piva //the allocation is done
189*94c4a1e1SFrank Piva //
190*94c4a1e1SFrank Piva //For l1, the key is (1ULL << 63) | offset & ~((1ULL << (cluster_bits + l2 entries bits)) - 1)
191*94c4a1e1SFrank Piva //
192*94c4a1e1SFrank Piva //for l2, the key is offset & ~((1ULL << cluster_bits) - 1)
193*94c4a1e1SFrank Piva std::unordered_map<u64, u32> entry_alloc;
194*94c4a1e1SFrank Piva u32 max_alloc_entries;
195*94c4a1e1SFrank Piva
l2_slice_virt_size()196*94c4a1e1SFrank Piva u64 l2_slice_virt_size() {
197*94c4a1e1SFrank Piva return 1ULL << (cluster_bits + L2_TABLE_SLICE_BITS - 3);
198*94c4a1e1SFrank Piva }
199*94c4a1e1SFrank Piva
l2_slice_key(u64 virt_offset)200*94c4a1e1SFrank Piva u64 l2_slice_key(u64 virt_offset) {
201*94c4a1e1SFrank Piva return ((virt_offset) & ~(l2_slice_virt_size() - 1));
202*94c4a1e1SFrank Piva }
203*94c4a1e1SFrank Piva
__entry_get_alloc_state(u64 key)204*94c4a1e1SFrank Piva u32 __entry_get_alloc_state(u64 key) {
205*94c4a1e1SFrank Piva auto it = entry_alloc.find(key);
206*94c4a1e1SFrank Piva
207*94c4a1e1SFrank Piva if (it != entry_alloc.end())
208*94c4a1e1SFrank Piva return it->second;
209*94c4a1e1SFrank Piva return -1;
210*94c4a1e1SFrank Piva }
211*94c4a1e1SFrank Piva
__entry_is_allocating(u64 key)212*94c4a1e1SFrank Piva bool __entry_is_allocating(u64 key) {
213*94c4a1e1SFrank Piva u32 state = __entry_get_alloc_state(key);
214*94c4a1e1SFrank Piva
215*94c4a1e1SFrank Piva return state != -1;
216*94c4a1e1SFrank Piva }
217*94c4a1e1SFrank Piva
__entry_mark_allocating(u64 key,u32 owner)218*94c4a1e1SFrank Piva void __entry_mark_allocating(u64 key, u32 owner) {
219*94c4a1e1SFrank Piva auto it = entry_alloc.find(key);
220*94c4a1e1SFrank Piva u32 sz;
221*94c4a1e1SFrank Piva
222*94c4a1e1SFrank Piva qcow2_assert(it == entry_alloc.end());
223*94c4a1e1SFrank Piva
224*94c4a1e1SFrank Piva entry_alloc[key] = owner;
225*94c4a1e1SFrank Piva
226*94c4a1e1SFrank Piva sz = entry_alloc.size();
227*94c4a1e1SFrank Piva if (sz > max_alloc_entries)
228*94c4a1e1SFrank Piva max_alloc_entries = sz;
229*94c4a1e1SFrank Piva }
230*94c4a1e1SFrank Piva
__entry_mark_allocated(u64 key)231*94c4a1e1SFrank Piva void __entry_mark_allocated(u64 key) {
232*94c4a1e1SFrank Piva auto it = entry_alloc.find(key);
233*94c4a1e1SFrank Piva
234*94c4a1e1SFrank Piva qcow2_assert(it != entry_alloc.end());
235*94c4a1e1SFrank Piva
236*94c4a1e1SFrank Piva entry_alloc.erase(it);
237*94c4a1e1SFrank Piva }
238*94c4a1e1SFrank Piva
l1_entry_alloc_key(u64 offset)239*94c4a1e1SFrank Piva u64 l1_entry_alloc_key(u64 offset) {
240*94c4a1e1SFrank Piva return (offset & ~((1ULL << (cluster_bits +
241*94c4a1e1SFrank Piva l2_entries_order)) - 1)) |
242*94c4a1e1SFrank Piva (1ULL << 63);
243*94c4a1e1SFrank Piva }
244*94c4a1e1SFrank Piva
l2_entry_alloc_key(u64 offset)245*94c4a1e1SFrank Piva u64 l2_entry_alloc_key(u64 offset) {
246*94c4a1e1SFrank Piva u64 key = (offset & ~((1ULL << cluster_bits) - 1));
247*94c4a1e1SFrank Piva
248*94c4a1e1SFrank Piva qcow2_assert(!(key & (1ULL << 63)));
249*94c4a1e1SFrank Piva return key;
250*94c4a1e1SFrank Piva }
251*94c4a1e1SFrank Piva
entry_alloc_key(u64 offset,bool l1)252*94c4a1e1SFrank Piva u64 entry_alloc_key(u64 offset, bool l1) {
253*94c4a1e1SFrank Piva if (l1)
254*94c4a1e1SFrank Piva return l1_entry_alloc_key(offset);
255*94c4a1e1SFrank Piva return l2_entry_alloc_key(offset);
256*94c4a1e1SFrank Piva }
257*94c4a1e1SFrank Piva
entry_is_allocating(u64 offset,bool l1)258*94c4a1e1SFrank Piva bool entry_is_allocating(u64 offset, bool l1) {
259*94c4a1e1SFrank Piva u64 key = entry_alloc_key(offset, l1);
260*94c4a1e1SFrank Piva
261*94c4a1e1SFrank Piva return __entry_is_allocating(key);
262*94c4a1e1SFrank Piva }
263*94c4a1e1SFrank Piva
entry_get_alloc_owner(u64 offset,bool l1)264*94c4a1e1SFrank Piva u32 entry_get_alloc_owner(u64 offset, bool l1) {
265*94c4a1e1SFrank Piva u64 key = entry_alloc_key(offset, l1);
266*94c4a1e1SFrank Piva u32 state = __entry_get_alloc_state(key);
267*94c4a1e1SFrank Piva
268*94c4a1e1SFrank Piva qcow2_assert(state != -1);
269*94c4a1e1SFrank Piva return state;
270*94c4a1e1SFrank Piva }
271*94c4a1e1SFrank Piva
entry_mark_allocating(u64 offset,u32 owner,bool l1)272*94c4a1e1SFrank Piva void entry_mark_allocating(u64 offset, u32 owner, bool l1) {
273*94c4a1e1SFrank Piva u64 key = entry_alloc_key(offset, l1);
274*94c4a1e1SFrank Piva
275*94c4a1e1SFrank Piva __entry_mark_allocating(key, owner);
276*94c4a1e1SFrank Piva }
277*94c4a1e1SFrank Piva
entry_mark_allocated(u64 offset,bool l1)278*94c4a1e1SFrank Piva void entry_mark_allocated(u64 offset, bool l1) {
279*94c4a1e1SFrank Piva u64 key = entry_alloc_key(offset, l1);
280*94c4a1e1SFrank Piva
281*94c4a1e1SFrank Piva __entry_mark_allocated(key);
282*94c4a1e1SFrank Piva }
283*94c4a1e1SFrank Piva
284*94c4a1e1SFrank Piva Qcow2L2Table *create_and_add_l2(const qcow2_io_ctx_t &ioc, u64 offset);
285*94c4a1e1SFrank Piva Qcow2L2Table *load_l2_slice(const qcow2_io_ctx_t &ioc, u64 offset,
286*94c4a1e1SFrank Piva u64 l1_entry);
287*94c4a1e1SFrank Piva int build_mapping(const qcow2_io_ctx_t &ioc,
288*94c4a1e1SFrank Piva u64 virt_offset, Qcow2L2Table *l2, u32 idx_in_slice,
289*94c4a1e1SFrank Piva u64 *l2_entry);
290*94c4a1e1SFrank Piva u64 __map_cluster(const qcow2_io_ctx_t &ioc,
291*94c4a1e1SFrank Piva Qcow2L2Table *l2, u64 offset, bool create_l2);
292*94c4a1e1SFrank Piva Qcow2L2Table *create_l2_map(const qcow2_io_ctx_t &ioc, u64 offset,
293*94c4a1e1SFrank Piva bool create_l2);
294*94c4a1e1SFrank Piva public:
295*94c4a1e1SFrank Piva // refcount table shouldn't be so big
296*94c4a1e1SFrank Piva Qcow2ClusterMapping(Qcow2State &qs);
297*94c4a1e1SFrank Piva
298*94c4a1e1SFrank Piva //the main logic for mapping cluster
299*94c4a1e1SFrank Piva //create l2 and setup the mapping if 'create_l2' is true & l2 isn't
300*94c4a1e1SFrank Piva //present for this 'offset'
301*94c4a1e1SFrank Piva u64 map_cluster(const qcow2_io_ctx_t &ioc, u64 offset, bool create_l2);
302*94c4a1e1SFrank Piva int figure_group_from_l1_table();
303*94c4a1e1SFrank Piva
304*94c4a1e1SFrank Piva Qcow2L2Table* __find_slice(u64 key, bool use_dirty=true);
305*94c4a1e1SFrank Piva
l1_idx(u64 offset)306*94c4a1e1SFrank Piva u64 l1_idx(u64 offset) {
307*94c4a1e1SFrank Piva return offset >> (cluster_bits + l2_entries_order);
308*94c4a1e1SFrank Piva }
309*94c4a1e1SFrank Piva
l2_idx(u64 offset)310*94c4a1e1SFrank Piva u64 l2_idx(u64 offset) {
311*94c4a1e1SFrank Piva return (offset >> cluster_bits) &
312*94c4a1e1SFrank Piva ((1ULL << l2_entries_order) - 1);
313*94c4a1e1SFrank Piva }
314*94c4a1e1SFrank Piva
has_evicted_dirty_slices()315*94c4a1e1SFrank Piva bool has_evicted_dirty_slices()
316*94c4a1e1SFrank Piva {
317*94c4a1e1SFrank Piva return cache.has_evicted_dirty_slices();
318*94c4a1e1SFrank Piva }
319*94c4a1e1SFrank Piva
320*94c4a1e1SFrank Piva void dump_meta();
321*94c4a1e1SFrank Piva };
322*94c4a1e1SFrank Piva
323*94c4a1e1SFrank Piva enum QCOW2_CLUSTER_USE {
324*94c4a1e1SFrank Piva L2_TABLE = 0,
325*94c4a1e1SFrank Piva REFCOUNT_BLK = 1,
326*94c4a1e1SFrank Piva DATA = 2,
327*94c4a1e1SFrank Piva };
328*94c4a1e1SFrank Piva
329*94c4a1e1SFrank Piva /*
330*94c4a1e1SFrank Piva * Think about lifetime issue. Is it possible that one state is removed
331*94c4a1e1SFrank Piva * but it is being used somewhere?
332*94c4a1e1SFrank Piva *
333*94c4a1e1SFrank Piva * So far the simple rule is that the state can only be removed after
334*94c4a1e1SFrank Piva * its state becomes QCOW2_ALLOC_ZEROED.
335*94c4a1e1SFrank Piva *
336*94c4a1e1SFrank Piva * So except for being absolute safety, don't call get_cluster_state()
337*94c4a1e1SFrank Piva * directly.
338*94c4a1e1SFrank Piva */
339*94c4a1e1SFrank Piva class Qcow2ClusterState {
340*94c4a1e1SFrank Piva #define QCOW2_ALLOC_STARTED 0 //cluster allocated in ram
341*94c4a1e1SFrank Piva #define QCOW2_ALLOC_ZEROING 1 //IO for zeroing this cluster is submitted
342*94c4a1e1SFrank Piva #define QCOW2_ALLOC_ZEROED 2 //cluster zeroed
343*94c4a1e1SFrank Piva #define QCOW2_ALLOC_DONE 3 //mapping setup
344*94c4a1e1SFrank Piva private:
345*94c4a1e1SFrank Piva u8 state;
346*94c4a1e1SFrank Piva u8 purpose;
347*94c4a1e1SFrank Piva IOWaiters io_waiters;
348*94c4a1e1SFrank Piva
349*94c4a1e1SFrank Piva public:
Qcow2ClusterState()350*94c4a1e1SFrank Piva Qcow2ClusterState() {
351*94c4a1e1SFrank Piva state = QCOW2_ALLOC_STARTED;
352*94c4a1e1SFrank Piva }
353*94c4a1e1SFrank Piva
Qcow2ClusterState(u8 s,u8 p)354*94c4a1e1SFrank Piva Qcow2ClusterState(u8 s, u8 p) {
355*94c4a1e1SFrank Piva state = s;
356*94c4a1e1SFrank Piva purpose = p;
357*94c4a1e1SFrank Piva }
358*94c4a1e1SFrank Piva
359*94c4a1e1SFrank Piva //called after the cluster is allocated from ram
get_state()360*94c4a1e1SFrank Piva u8 get_state() {
361*94c4a1e1SFrank Piva return state;
362*94c4a1e1SFrank Piva }
363*94c4a1e1SFrank Piva
set_state(u8 s)364*94c4a1e1SFrank Piva void set_state(u8 s) {
365*94c4a1e1SFrank Piva state = s;
366*94c4a1e1SFrank Piva }
367*94c4a1e1SFrank Piva
get_purpose()368*94c4a1e1SFrank Piva u8 get_purpose() {
369*94c4a1e1SFrank Piva return purpose;
370*94c4a1e1SFrank Piva }
371*94c4a1e1SFrank Piva
add_waiter(unsigned tag)372*94c4a1e1SFrank Piva void add_waiter(unsigned tag) {
373*94c4a1e1SFrank Piva io_waiters.add_waiter(tag);
374*94c4a1e1SFrank Piva }
375*94c4a1e1SFrank Piva
wakeup_all(const struct ublksrv_queue * q,unsigned my_tag)376*94c4a1e1SFrank Piva void wakeup_all(const struct ublksrv_queue *q, unsigned my_tag) {
377*94c4a1e1SFrank Piva io_waiters.wakeup_all(q, my_tag);
378*94c4a1e1SFrank Piva }
379*94c4a1e1SFrank Piva };
380*94c4a1e1SFrank Piva
381*94c4a1e1SFrank Piva /* todo: remove caches in destructor */
382*94c4a1e1SFrank Piva class Qcow2ClusterAllocator {
383*94c4a1e1SFrank Piva private:
384*94c4a1e1SFrank Piva Qcow2State &state;
385*94c4a1e1SFrank Piva s32 slice_idx;
386*94c4a1e1SFrank Piva u8 table_entry_virt_size_bits;
387*94c4a1e1SFrank Piva u64 alloc_cnt;
388*94c4a1e1SFrank Piva slice_cache <Qcow2RefcountBlock> cache;
389*94c4a1e1SFrank Piva
390*94c4a1e1SFrank Piva u32 refcount_block_entries();
391*94c4a1e1SFrank Piva void allocate_refcount_blk(const qcow2_io_ctx_t &ioc, s32 idx);
392*94c4a1e1SFrank Piva
393*94c4a1e1SFrank Piva friend class Qcow2State;
394*94c4a1e1SFrank Piva
395*94c4a1e1SFrank Piva public:
396*94c4a1e1SFrank Piva //key is cluster start offset, val is its allocate status
397*94c4a1e1SFrank Piva std::unordered_map<u64, Qcow2ClusterState *> alloc_state;
398*94c4a1e1SFrank Piva u32 max_alloc_states;
399*94c4a1e1SFrank Piva u64 max_physical_size;
400*94c4a1e1SFrank Piva
401*94c4a1e1SFrank Piva // refcount table shouldn't be so big
402*94c4a1e1SFrank Piva Qcow2ClusterAllocator(Qcow2State &qs);
403*94c4a1e1SFrank Piva
404*94c4a1e1SFrank Piva //called after refcount table is loaded
405*94c4a1e1SFrank Piva void setup();
406*94c4a1e1SFrank Piva u64 allocate_cluster(const qcow2_io_ctx_t &ioc);
407*94c4a1e1SFrank Piva u64 refcount_blk_key(const Qcow2RefcountBlock *rb);
408*94c4a1e1SFrank Piva void dump_meta();
409*94c4a1e1SFrank Piva int figure_group_from_refcount_table();
410*94c4a1e1SFrank Piva
411*94c4a1e1SFrank Piva Qcow2RefcountBlock* __find_slice(u64 key);
412*94c4a1e1SFrank Piva
has_evicted_dirty_slices()413*94c4a1e1SFrank Piva bool has_evicted_dirty_slices()
414*94c4a1e1SFrank Piva {
415*94c4a1e1SFrank Piva return cache.has_evicted_dirty_slices();
416*94c4a1e1SFrank Piva }
417*94c4a1e1SFrank Piva
418*94c4a1e1SFrank Piva /* the following helpers are for implementing soft update */
419*94c4a1e1SFrank Piva
420*94c4a1e1SFrank Piva //don't refer to one state after one cycle of coroutine wait &
421*94c4a1e1SFrank Piva //wakeup, and caller has to check if the return value
get_cluster_state(u64 cluster_offset)422*94c4a1e1SFrank Piva Qcow2ClusterState *get_cluster_state(u64 cluster_offset) {
423*94c4a1e1SFrank Piva auto it = alloc_state.find(cluster_offset);
424*94c4a1e1SFrank Piva
425*94c4a1e1SFrank Piva if (it == alloc_state.end())
426*94c4a1e1SFrank Piva return nullptr;
427*94c4a1e1SFrank Piva
428*94c4a1e1SFrank Piva return it->second;
429*94c4a1e1SFrank Piva }
430*94c4a1e1SFrank Piva
431*94c4a1e1SFrank Piva //the zeroing io may return -EAGAIN, then we need to
432*94c4a1e1SFrank Piva //reset its state for re-issuing zeroing IO
alloc_cluster_reset(u64 cluster_offset)433*94c4a1e1SFrank Piva bool alloc_cluster_reset(u64 cluster_offset) {
434*94c4a1e1SFrank Piva auto it = alloc_state.find(cluster_offset);
435*94c4a1e1SFrank Piva
436*94c4a1e1SFrank Piva if (it == alloc_state.end())
437*94c4a1e1SFrank Piva return false;
438*94c4a1e1SFrank Piva
439*94c4a1e1SFrank Piva //maybe the cluster has been zeroed, so double check
440*94c4a1e1SFrank Piva if (it->second->get_state() < QCOW2_ALLOC_ZEROED) {
441*94c4a1e1SFrank Piva it->second->set_state(QCOW2_ALLOC_STARTED);
442*94c4a1e1SFrank Piva return true;
443*94c4a1e1SFrank Piva }
444*94c4a1e1SFrank Piva return false;
445*94c4a1e1SFrank Piva }
446*94c4a1e1SFrank Piva
447*94c4a1e1SFrank Piva //called after the cluster is allocated from ram
448*94c4a1e1SFrank Piva void alloc_cluster_started(const qcow2_io_ctx_t &ioc,
449*94c4a1e1SFrank Piva u64 cluster_offset, u8 purpose);
450*94c4a1e1SFrank Piva
451*94c4a1e1SFrank Piva //check if the allocated cluster is zeroed
alloc_cluster_is_zeroed(u64 cluster_offset)452*94c4a1e1SFrank Piva bool alloc_cluster_is_zeroed(u64 cluster_offset) {
453*94c4a1e1SFrank Piva Qcow2ClusterState * cs = get_cluster_state(cluster_offset);
454*94c4a1e1SFrank Piva
455*94c4a1e1SFrank Piva return cs == nullptr || cs->get_state() >= QCOW2_ALLOC_ZEROED;
456*94c4a1e1SFrank Piva }
457*94c4a1e1SFrank Piva
458*94c4a1e1SFrank Piva //called after IO for zeroing this cluster is started
459*94c4a1e1SFrank Piva void alloc_cluster_zeroing(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
460*94c4a1e1SFrank Piva
461*94c4a1e1SFrank Piva //called after the cluster is zeroed
462*94c4a1e1SFrank Piva void alloc_cluster_zeroed(const struct ublksrv_queue *q,
463*94c4a1e1SFrank Piva int tag, u64 cluster_offset);
464*94c4a1e1SFrank Piva
465*94c4a1e1SFrank Piva //called after the cluster is zeroed and associated mapping is updated
466*94c4a1e1SFrank Piva void alloc_cluster_done(const qcow2_io_ctx_t &ioc, u64 cluster_offset);
467*94c4a1e1SFrank Piva
468*94c4a1e1SFrank Piva //called after the cluster is zeroed and associated mapping is updated
469*94c4a1e1SFrank Piva void alloc_cluster_add_waiter(const qcow2_io_ctx_t &ioc,
470*94c4a1e1SFrank Piva u64 cluster_offset);
471*94c4a1e1SFrank Piva };
472*94c4a1e1SFrank Piva
473*94c4a1e1SFrank Piva class Qcow2Image {
474*94c4a1e1SFrank Piva private:
475*94c4a1e1SFrank Piva std::string fpath;
476*94c4a1e1SFrank Piva public:
477*94c4a1e1SFrank Piva int fd;
478*94c4a1e1SFrank Piva Qcow2Image(const char *path);
479*94c4a1e1SFrank Piva ~Qcow2Image();
480*94c4a1e1SFrank Piva };
481*94c4a1e1SFrank Piva
482*94c4a1e1SFrank Piva enum qcow2_meta_flush {
483*94c4a1e1SFrank Piva IDLE,
484*94c4a1e1SFrank Piva PREP_WRITE_SLICES, //all slices are added to list for flush
485*94c4a1e1SFrank Piva ZERO_MY_CLUSTER,
486*94c4a1e1SFrank Piva WAIT, //valid only for mapping table, wait for refcount table flushing done
487*94c4a1e1SFrank Piva WRITE_SLICES,
488*94c4a1e1SFrank Piva WRITE_TOP,
489*94c4a1e1SFrank Piva DONE,
490*94c4a1e1SFrank Piva };
491*94c4a1e1SFrank Piva
492*94c4a1e1SFrank Piva class MetaFlushingState {
493*94c4a1e1SFrank Piva private:
494*94c4a1e1SFrank Piva // for flushing slices depended by current parent_idx, and for
495*94c4a1e1SFrank Piva // handling state of WRITE_SLICE
496*94c4a1e1SFrank Piva //
497*94c4a1e1SFrank Piva //any slices depended by current parent_idx are added to this list,
498*94c4a1e1SFrank Piva //and it is removed after the flushing is done
499*94c4a1e1SFrank Piva //
500*94c4a1e1SFrank Piva //once the list becomes empty, the state is switched to
501*94c4a1e1SFrank Piva //WRITE_TOP.
502*94c4a1e1SFrank Piva std::vector <Qcow2SliceMeta *> slices_to_flush;
503*94c4a1e1SFrank Piva std::vector <Qcow2SliceMeta *> slices_in_flight;
504*94c4a1e1SFrank Piva unsigned state;
505*94c4a1e1SFrank Piva int parent_blk_idx;
506*94c4a1e1SFrank Piva int parent_entry_idx;
507*94c4a1e1SFrank Piva bool mapping;
508*94c4a1e1SFrank Piva
509*94c4a1e1SFrank Piva void del_meta_from_list(std::vector <Qcow2SliceMeta *> &v,
510*94c4a1e1SFrank Piva const Qcow2SliceMeta *t);
511*94c4a1e1SFrank Piva
512*94c4a1e1SFrank Piva void __prep_write_slice(Qcow2State &qs, const struct ublksrv_queue *q);
513*94c4a1e1SFrank Piva
514*94c4a1e1SFrank Piva void __zero_my_cluster(Qcow2State &qs, const struct ublksrv_queue *q);
515*94c4a1e1SFrank Piva co_io_job __zero_my_cluster_co(Qcow2State &qs,
516*94c4a1e1SFrank Piva const struct ublksrv_queue *q, struct ublk_io_tgt *io, int tag,
517*94c4a1e1SFrank Piva Qcow2SliceMeta *m);
518*94c4a1e1SFrank Piva
519*94c4a1e1SFrank Piva void __write_slices(Qcow2State &qs, const struct ublksrv_queue *q);
520*94c4a1e1SFrank Piva co_io_job __write_slice_co(Qcow2State &qs,
521*94c4a1e1SFrank Piva const struct ublksrv_queue *q, Qcow2SliceMeta *m,
522*94c4a1e1SFrank Piva struct ublk_io_tgt *io, int tag);
523*94c4a1e1SFrank Piva
524*94c4a1e1SFrank Piva void __write_top(Qcow2State &qs, const struct ublksrv_queue *q);
525*94c4a1e1SFrank Piva co_io_job __write_top_co(Qcow2State &qs, const struct ublksrv_queue *q,
526*94c4a1e1SFrank Piva struct ublk_io_tgt *io, int tag);
527*94c4a1e1SFrank Piva
528*94c4a1e1SFrank Piva void __done(Qcow2State &qs, const struct ublksrv_queue *q);
529*94c4a1e1SFrank Piva bool __need_flush(int queued);
530*94c4a1e1SFrank Piva void mark_no_update();
531*94c4a1e1SFrank Piva public:
532*94c4a1e1SFrank Piva Qcow2TopTable ⊤
533*94c4a1e1SFrank Piva unsigned slice_dirtied;
534*94c4a1e1SFrank Piva std::chrono::system_clock::time_point last_flush;
535*94c4a1e1SFrank Piva
get_state()536*94c4a1e1SFrank Piva unsigned get_state() const {
537*94c4a1e1SFrank Piva return state;
538*94c4a1e1SFrank Piva }
set_state(u32 s)539*94c4a1e1SFrank Piva void set_state(u32 s) {
540*94c4a1e1SFrank Piva ublk_dbg(UBLK_DBG_QCOW2_FLUSH, "%s: map %d slice_dirtied %u parent_blk_idx %d"
541*94c4a1e1SFrank Piva " parent_entry_idx %d %d->%d to_flush %zd in_flight %zd\n",
542*94c4a1e1SFrank Piva __func__, mapping, slice_dirtied,
543*94c4a1e1SFrank Piva parent_blk_idx, parent_entry_idx, state,
544*94c4a1e1SFrank Piva s, slices_to_flush.size(),
545*94c4a1e1SFrank Piva slices_in_flight.size());
546*94c4a1e1SFrank Piva state = s;
547*94c4a1e1SFrank Piva }
548*94c4a1e1SFrank Piva
549*94c4a1e1SFrank Piva MetaFlushingState(Qcow2TopTable &t, bool is_mapping);
550*94c4a1e1SFrank Piva void slice_is_done(const Qcow2SliceMeta*);
551*94c4a1e1SFrank Piva void add_slice_to_flush(Qcow2SliceMeta *m);
552*94c4a1e1SFrank Piva void run_flush(Qcow2State &qs, const struct ublksrv_queue *q,
553*94c4a1e1SFrank Piva int top_blk_idx);
554*94c4a1e1SFrank Piva bool need_flush(Qcow2State &qs, int *top_idx, unsigned queued);
555*94c4a1e1SFrank Piva void dump(const char *func, int line) const;
556*94c4a1e1SFrank Piva int calc_refcount_dirty_blk_range(Qcow2State& qs,
557*94c4a1e1SFrank Piva int *refcnt_blk_start, int *refcnt_blk_end);
558*94c4a1e1SFrank Piva };
559*94c4a1e1SFrank Piva
560*94c4a1e1SFrank Piva /*
561*94c4a1e1SFrank Piva * For any kind of meta flushing, one tag or io slot is required,
562*94c4a1e1SFrank Piva * so start the meta flushing class with meta tag allocator.
563*94c4a1e1SFrank Piva *
564*94c4a1e1SFrank Piva * Meta data updating is never forground task, so if running out
565*94c4a1e1SFrank Piva * of tags, let's wait until one tag is released.
566*94c4a1e1SFrank Piva */
567*94c4a1e1SFrank Piva class Qcow2MetaFlushing {
568*94c4a1e1SFrank Piva private:
569*94c4a1e1SFrank Piva std::vector <bool> tags;
570*94c4a1e1SFrank Piva
571*94c4a1e1SFrank Piva int refcnt_blk_start;
572*94c4a1e1SFrank Piva int refcnt_blk_end;
573*94c4a1e1SFrank Piva
574*94c4a1e1SFrank Piva bool handle_mapping_dependency_start_end(Qcow2State *qs,
575*94c4a1e1SFrank Piva const struct ublksrv_queue *q);
576*94c4a1e1SFrank Piva void handle_mapping_dependency(Qcow2State *qs,
577*94c4a1e1SFrank Piva const struct ublksrv_queue *q);
578*94c4a1e1SFrank Piva public:
579*94c4a1e1SFrank Piva Qcow2State &state;
580*94c4a1e1SFrank Piva
581*94c4a1e1SFrank Piva MetaFlushingState mapping_stat;
582*94c4a1e1SFrank Piva MetaFlushingState refcount_stat;
583*94c4a1e1SFrank Piva
inc_dirtied_slice(bool mapping)584*94c4a1e1SFrank Piva void inc_dirtied_slice(bool mapping) {
585*94c4a1e1SFrank Piva if (mapping)
586*94c4a1e1SFrank Piva mapping_stat.slice_dirtied += 1;
587*94c4a1e1SFrank Piva else
588*94c4a1e1SFrank Piva refcount_stat.slice_dirtied += 1;
589*94c4a1e1SFrank Piva }
590*94c4a1e1SFrank Piva
dec_dirtied_slice(bool mapping)591*94c4a1e1SFrank Piva void dec_dirtied_slice(bool mapping) {
592*94c4a1e1SFrank Piva if (mapping)
593*94c4a1e1SFrank Piva mapping_stat.slice_dirtied -= 1;
594*94c4a1e1SFrank Piva else
595*94c4a1e1SFrank Piva refcount_stat.slice_dirtied -= 1;
596*94c4a1e1SFrank Piva }
597*94c4a1e1SFrank Piva
598*94c4a1e1SFrank Piva Qcow2MetaFlushing(Qcow2State &qs);
599*94c4a1e1SFrank Piva void dump();
600*94c4a1e1SFrank Piva int alloc_tag(const struct ublksrv_queue *q);
601*94c4a1e1SFrank Piva void free_tag(const struct ublksrv_queue *q, int tag);
602*94c4a1e1SFrank Piva void run_flush(const struct ublksrv_queue *q, int queued);
603*94c4a1e1SFrank Piva bool is_flushing();
604*94c4a1e1SFrank Piva };
605*94c4a1e1SFrank Piva
606*94c4a1e1SFrank Piva class Qcow2State {
607*94c4a1e1SFrank Piva private:
608*94c4a1e1SFrank Piva std::vector <Qcow2SliceMeta *> freed_slices;
609*94c4a1e1SFrank Piva public:
610*94c4a1e1SFrank Piva const struct ublksrv_ctrl_dev_info *dev_info;
611*94c4a1e1SFrank Piva unsigned min_bs_bits;
612*94c4a1e1SFrank Piva struct meta_mapping {
613*94c4a1e1SFrank Piva int nr;
614*94c4a1e1SFrank Piva std::vector <Qcow2MappingMeta *> meta;
615*94c4a1e1SFrank Piva };
616*94c4a1e1SFrank Piva typedef std::valarray<struct meta_mapping> MetaArray;
617*94c4a1e1SFrank Piva
618*94c4a1e1SFrank Piva const struct ublksrv_dev *dev;
619*94c4a1e1SFrank Piva Qcow2Image img;
620*94c4a1e1SFrank Piva Qcow2Header header;
621*94c4a1e1SFrank Piva
622*94c4a1e1SFrank Piva /* must be declared after header */
623*94c4a1e1SFrank Piva Qcow2L1Table l1_table;
624*94c4a1e1SFrank Piva
625*94c4a1e1SFrank Piva /* must be declared after header */
626*94c4a1e1SFrank Piva Qcow2RefcountTable refcount_table;
627*94c4a1e1SFrank Piva
628*94c4a1e1SFrank Piva Qcow2ClusterAllocator cluster_allocator;
629*94c4a1e1SFrank Piva Qcow2ClusterMapping cluster_map;
630*94c4a1e1SFrank Piva
631*94c4a1e1SFrank Piva // map meta io object with one per-queue unique ID, which is set
632*94c4a1e1SFrank Piva // in sqe->user_data, so we can retrieve the meta io object by
633*94c4a1e1SFrank Piva // cqe->user_data after the io is done.
634*94c4a1e1SFrank Piva MetaArray meta_io_map;
635*94c4a1e1SFrank Piva
636*94c4a1e1SFrank Piva Qcow2MetaFlushing meta_flushing;
637*94c4a1e1SFrank Piva
638*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
639*94c4a1e1SFrank Piva std::unordered_map<u64, u64> cluster_use;
640*94c4a1e1SFrank Piva std::unordered_map<u64, u64> cluster_validate_map;
641*94c4a1e1SFrank Piva #endif
642*94c4a1e1SFrank Piva
643*94c4a1e1SFrank Piva Qcow2State(const char *img_path, const struct ublksrv_dev *dev);
644*94c4a1e1SFrank Piva virtual ~Qcow2State();
645*94c4a1e1SFrank Piva
646*94c4a1e1SFrank Piva virtual u32 get_l1_table_max_size();
647*94c4a1e1SFrank Piva virtual u64 get_l1_table_offset();
648*94c4a1e1SFrank Piva
649*94c4a1e1SFrank Piva virtual u32 get_refcount_table_max_size();
650*94c4a1e1SFrank Piva virtual u32 get_refcount_table_act_size();
651*94c4a1e1SFrank Piva virtual u64 get_refcount_table_offset();
652*94c4a1e1SFrank Piva
get_meta_io(u32 qid,u32 pos)653*94c4a1e1SFrank Piva Qcow2MappingMeta *get_meta_io(u32 qid, u32 pos) {
654*94c4a1e1SFrank Piva return meta_io_map[qid].meta[pos];
655*94c4a1e1SFrank Piva }
656*94c4a1e1SFrank Piva
del_meta_io(u32 qid,u32 pos)657*94c4a1e1SFrank Piva void del_meta_io(u32 qid, u32 pos) {
658*94c4a1e1SFrank Piva meta_io_map[qid].meta[pos] = nullptr;
659*94c4a1e1SFrank Piva meta_io_map[qid].nr--;
660*94c4a1e1SFrank Piva
661*94c4a1e1SFrank Piva if (!meta_io_map[qid].nr)
662*94c4a1e1SFrank Piva meta_io_map[qid].meta.clear();
663*94c4a1e1SFrank Piva }
664*94c4a1e1SFrank Piva
get_dev_size()665*94c4a1e1SFrank Piva u64 get_dev_size() {
666*94c4a1e1SFrank Piva return dev->tgt.dev_size;
667*94c4a1e1SFrank Piva }
668*94c4a1e1SFrank Piva
get_min_flush_unit_bits()669*94c4a1e1SFrank Piva unsigned get_min_flush_unit_bits(){
670*94c4a1e1SFrank Piva return min_bs_bits;
671*94c4a1e1SFrank Piva }
672*94c4a1e1SFrank Piva
add_slice_to_free_list(Qcow2SliceMeta * m)673*94c4a1e1SFrank Piva void add_slice_to_free_list(Qcow2SliceMeta *m) {
674*94c4a1e1SFrank Piva freed_slices.push_back(m);
675*94c4a1e1SFrank Piva }
676*94c4a1e1SFrank Piva
677*94c4a1e1SFrank Piva void kill_slices(const struct ublksrv_queue *q);
678*94c4a1e1SFrank Piva u32 add_meta_io(u32 qid, Qcow2MappingMeta *m);
679*94c4a1e1SFrank Piva void dump_meta();
680*94c4a1e1SFrank Piva void reclaim_slice(Qcow2SliceMeta *m);
681*94c4a1e1SFrank Piva void remove_slice_from_evicted_list(Qcow2SliceMeta *m);
682*94c4a1e1SFrank Piva bool has_dirty_slice();
683*94c4a1e1SFrank Piva u32 get_l2_slices_count();
684*94c4a1e1SFrank Piva void shrink_cache();
685*94c4a1e1SFrank Piva
686*94c4a1e1SFrank Piva #ifdef DEBUG_QCOW2_META_VALIDATE
687*94c4a1e1SFrank Piva void validate_cluster_use(u64 host_off, u64 virt_off, u32 use);
688*94c4a1e1SFrank Piva bool validate_cluster_map(u64 host_off, u64 virt_off);
689*94c4a1e1SFrank Piva #else
validate_cluster_use(u64 host_off,u64 virt_off,u32 use)690*94c4a1e1SFrank Piva void validate_cluster_use(u64 host_off, u64 virt_off, u32 use) {}
validate_cluster_map(u64 host_off,u64 virt_off)691*94c4a1e1SFrank Piva bool validate_cluster_map(u64 host_off, u64 virt_off) { return true;}
692*94c4a1e1SFrank Piva #endif
693*94c4a1e1SFrank Piva };
694*94c4a1e1SFrank Piva
dev_to_qcow2state(const struct ublksrv_dev * dev)695*94c4a1e1SFrank Piva static inline Qcow2State *dev_to_qcow2state(const struct ublksrv_dev *dev)
696*94c4a1e1SFrank Piva {
697*94c4a1e1SFrank Piva return (Qcow2State *)dev->tgt.tgt_data;
698*94c4a1e1SFrank Piva }
699*94c4a1e1SFrank Piva
queue_to_qcow2state(const struct ublksrv_queue * q)700*94c4a1e1SFrank Piva static inline Qcow2State *queue_to_qcow2state(const struct ublksrv_queue *q)
701*94c4a1e1SFrank Piva {
702*94c4a1e1SFrank Piva return (Qcow2State *)q->private_data;
703*94c4a1e1SFrank Piva }
704*94c4a1e1SFrank Piva
705*94c4a1e1SFrank Piva Qcow2State *make_qcow2state(const char *file, struct ublksrv_dev *dev);
706*94c4a1e1SFrank Piva
707*94c4a1e1SFrank Piva class Qcow2StatePlain : public Qcow2State {
708*94c4a1e1SFrank Piva public:
Qcow2StatePlain(const char * img_path,const struct ublksrv_dev * dev)709*94c4a1e1SFrank Piva Qcow2StatePlain(const char *img_path, const struct ublksrv_dev *dev):
710*94c4a1e1SFrank Piva Qcow2State(img_path, dev) {}
711*94c4a1e1SFrank Piva };
712*94c4a1e1SFrank Piva
713*94c4a1e1SFrank Piva class Qcow2StateSnapshot : public Qcow2State {
714*94c4a1e1SFrank Piva public:
Qcow2StateSnapshot(const char * img_path,const struct ublksrv_dev * dev)715*94c4a1e1SFrank Piva Qcow2StateSnapshot(const char *img_path, const struct ublksrv_dev *dev):
716*94c4a1e1SFrank Piva Qcow2State(img_path, dev) {}
717*94c4a1e1SFrank Piva };
718*94c4a1e1SFrank Piva
719*94c4a1e1SFrank Piva class Qcow2StateExternalDataFile : public Qcow2State {
720*94c4a1e1SFrank Piva public:
Qcow2StateExternalDataFile(const char * img_path,const struct ublksrv_dev * dev)721*94c4a1e1SFrank Piva Qcow2StateExternalDataFile(const char *img_path, const struct ublksrv_dev *dev):
722*94c4a1e1SFrank Piva Qcow2State(img_path, dev) {}
723*94c4a1e1SFrank Piva };
724*94c4a1e1SFrank Piva
qcow2_meta_io_done(const struct ublksrv_queue * q,const struct io_uring_cqe * cqe)725*94c4a1e1SFrank Piva static inline int qcow2_meta_io_done(const struct ublksrv_queue *q,
726*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe)
727*94c4a1e1SFrank Piva {
728*94c4a1e1SFrank Piva if (!cqe)
729*94c4a1e1SFrank Piva return -EAGAIN;
730*94c4a1e1SFrank Piva
731*94c4a1e1SFrank Piva int op = user_data_to_op(cqe->user_data);
732*94c4a1e1SFrank Piva int tag = user_data_to_tag(cqe->user_data);
733*94c4a1e1SFrank Piva u32 tgt_data = user_data_to_tgt_data(cqe->user_data);
734*94c4a1e1SFrank Piva
735*94c4a1e1SFrank Piva /* plain IO's tgt_data is zero */
736*94c4a1e1SFrank Piva if (tgt_data == 0) {
737*94c4a1e1SFrank Piva ublk_err( "%s target data is zero for meta io(tag %d op %u %llx)\n",
738*94c4a1e1SFrank Piva __func__, tag, op, cqe->user_data);
739*94c4a1e1SFrank Piva return -EAGAIN;
740*94c4a1e1SFrank Piva }
741*94c4a1e1SFrank Piva
742*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
743*94c4a1e1SFrank Piva /* retrieve meta data from target data part of cqe->user_data */
744*94c4a1e1SFrank Piva Qcow2MappingMeta *meta = qs->get_meta_io(q->q_id, tgt_data - 1);
745*94c4a1e1SFrank Piva
746*94c4a1e1SFrank Piva if (cqe->res < 0)
747*94c4a1e1SFrank Piva ublk_err( "%s: tag %d op %d tgt_data %d meta %p userdata %d\n",
748*94c4a1e1SFrank Piva __func__, tag, user_data_to_op(cqe->user_data),
749*94c4a1e1SFrank Piva tgt_data, meta, cqe->res);
750*94c4a1e1SFrank Piva meta->io_done(*qs, q, cqe);
751*94c4a1e1SFrank Piva
752*94c4a1e1SFrank Piva return -EAGAIN;
753*94c4a1e1SFrank Piva }
754*94c4a1e1SFrank Piva
755*94c4a1e1SFrank Piva #endif
756