1*94c4a1e1SFrank Piva // SPDX-License-Identifier: GPL-2.0
2*94c4a1e1SFrank Piva #include "qcow2.h"
3*94c4a1e1SFrank Piva
MetaFlushingState(Qcow2TopTable & t,bool is_mapping)4*94c4a1e1SFrank Piva MetaFlushingState::MetaFlushingState(Qcow2TopTable &t, bool is_mapping):
5*94c4a1e1SFrank Piva mapping(is_mapping), top(t)
6*94c4a1e1SFrank Piva {
7*94c4a1e1SFrank Piva state = qcow2_meta_flush::IDLE;
8*94c4a1e1SFrank Piva slice_dirtied = 0;
9*94c4a1e1SFrank Piva parent_blk_idx = -1;
10*94c4a1e1SFrank Piva last_flush = std::chrono::system_clock::now();
11*94c4a1e1SFrank Piva }
12*94c4a1e1SFrank Piva
del_meta_from_list(std::vector<Qcow2SliceMeta * > & v,const Qcow2SliceMeta * t)13*94c4a1e1SFrank Piva void MetaFlushingState::del_meta_from_list(std::vector <Qcow2SliceMeta *> &v,
14*94c4a1e1SFrank Piva const Qcow2SliceMeta *t)
15*94c4a1e1SFrank Piva {
16*94c4a1e1SFrank Piva auto it = find(v.cbegin(), v.cend(), t);
17*94c4a1e1SFrank Piva
18*94c4a1e1SFrank Piva qcow2_assert(it != v.cend());
19*94c4a1e1SFrank Piva v.erase(it);
20*94c4a1e1SFrank Piva }
21*94c4a1e1SFrank Piva
slice_is_done(const Qcow2SliceMeta * t)22*94c4a1e1SFrank Piva void MetaFlushingState::slice_is_done(const Qcow2SliceMeta *t)
23*94c4a1e1SFrank Piva {
24*94c4a1e1SFrank Piva del_meta_from_list(slices_in_flight, t);
25*94c4a1e1SFrank Piva
26*94c4a1e1SFrank Piva qcow2_assert(state == WRITE_SLICES);
27*94c4a1e1SFrank Piva
28*94c4a1e1SFrank Piva if (slices_in_flight.empty() && slices_to_flush.empty()) {
29*94c4a1e1SFrank Piva if (++parent_entry_idx >= (512/8))
30*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::WRITE_TOP);
31*94c4a1e1SFrank Piva else
32*94c4a1e1SFrank Piva //handle next entry in this block of top table
33*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::PREP_WRITE_SLICES);
34*94c4a1e1SFrank Piva }
35*94c4a1e1SFrank Piva }
36*94c4a1e1SFrank Piva
add_slice_to_flush(Qcow2SliceMeta * m)37*94c4a1e1SFrank Piva void MetaFlushingState::add_slice_to_flush(Qcow2SliceMeta *m)
38*94c4a1e1SFrank Piva {
39*94c4a1e1SFrank Piva qcow2_assert(state == PREP_WRITE_SLICES);
40*94c4a1e1SFrank Piva qcow2_assert(m->get_dirty(-1));
41*94c4a1e1SFrank Piva
42*94c4a1e1SFrank Piva auto it = find(slices_to_flush.cbegin(), slices_to_flush.cend(), m);
43*94c4a1e1SFrank Piva qcow2_assert(it == slices_to_flush.cend());
44*94c4a1e1SFrank Piva
45*94c4a1e1SFrank Piva auto it1 = find(slices_in_flight.cbegin(), slices_in_flight.cend(), m);
46*94c4a1e1SFrank Piva qcow2_assert(it1 == slices_in_flight.cend());
47*94c4a1e1SFrank Piva
48*94c4a1e1SFrank Piva slices_to_flush.push_back(m);
49*94c4a1e1SFrank Piva }
50*94c4a1e1SFrank Piva
__write_slice_co(Qcow2State & qs,const struct ublksrv_queue * q,Qcow2SliceMeta * m,struct ublk_io_tgt * io,int tag)51*94c4a1e1SFrank Piva co_io_job MetaFlushingState::__write_slice_co(Qcow2State &qs,
52*94c4a1e1SFrank Piva const struct ublksrv_queue *q, Qcow2SliceMeta *m,
53*94c4a1e1SFrank Piva struct ublk_io_tgt *io, int tag)
54*94c4a1e1SFrank Piva {
55*94c4a1e1SFrank Piva int ret;
56*94c4a1e1SFrank Piva qcow2_io_ctx_t ioc(tag, q->q_id);
57*94c4a1e1SFrank Piva bool wait;
58*94c4a1e1SFrank Piva
59*94c4a1e1SFrank Piva slices_in_flight.push_back(m);
60*94c4a1e1SFrank Piva again:
61*94c4a1e1SFrank Piva try {
62*94c4a1e1SFrank Piva ret = m->flush(qs, ioc, m->get_offset(), m->get_buf_size());
63*94c4a1e1SFrank Piva wait = false;
64*94c4a1e1SFrank Piva } catch (MetaUpdateException &meta_update_error) {
65*94c4a1e1SFrank Piva wait = true;
66*94c4a1e1SFrank Piva }
67*94c4a1e1SFrank Piva
68*94c4a1e1SFrank Piva if (wait) {
69*94c4a1e1SFrank Piva co_await__suspend_always(tag);
70*94c4a1e1SFrank Piva goto again;
71*94c4a1e1SFrank Piva }
72*94c4a1e1SFrank Piva
73*94c4a1e1SFrank Piva if (ret < 0) {
74*94c4a1e1SFrank Piva ublk_err( "%s: zero my cluster failed %d\n",
75*94c4a1e1SFrank Piva __func__, ret);
76*94c4a1e1SFrank Piva goto exit;
77*94c4a1e1SFrank Piva }
78*94c4a1e1SFrank Piva
79*94c4a1e1SFrank Piva if (ret > 0) {
80*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe;
81*94c4a1e1SFrank Piva bool done = false;
82*94c4a1e1SFrank Piva int io_ret = 0;
83*94c4a1e1SFrank Piva
84*94c4a1e1SFrank Piva co_await__suspend_always(tag);
85*94c4a1e1SFrank Piva
86*94c4a1e1SFrank Piva cqe = io->tgt_io_cqe;
87*94c4a1e1SFrank Piva done = (cqe && cqe->res != -EAGAIN);
88*94c4a1e1SFrank Piva if (done)
89*94c4a1e1SFrank Piva io_ret = cqe->res;
90*94c4a1e1SFrank Piva ret = qcow2_meta_io_done(q, cqe);
91*94c4a1e1SFrank Piva if (!done && ret == -EAGAIN)
92*94c4a1e1SFrank Piva goto again;
93*94c4a1e1SFrank Piva
94*94c4a1e1SFrank Piva //here we can't retry since the slice may be
95*94c4a1e1SFrank Piva //dirtied just after io_done()
96*94c4a1e1SFrank Piva if (!done) {
97*94c4a1e1SFrank Piva if (ret < 0)
98*94c4a1e1SFrank Piva goto exit;
99*94c4a1e1SFrank Piva } else {
100*94c4a1e1SFrank Piva if (io_ret < 0)
101*94c4a1e1SFrank Piva goto exit;
102*94c4a1e1SFrank Piva ret = io_ret;
103*94c4a1e1SFrank Piva }
104*94c4a1e1SFrank Piva }
105*94c4a1e1SFrank Piva exit:
106*94c4a1e1SFrank Piva if (m->get_prep_flush()) {
107*94c4a1e1SFrank Piva m->set_prep_flush(false);
108*94c4a1e1SFrank Piva m->wakeup_all(q, tag);
109*94c4a1e1SFrank Piva }
110*94c4a1e1SFrank Piva qs.meta_flushing.free_tag(q, tag);
111*94c4a1e1SFrank Piva if (ret >= 0)
112*94c4a1e1SFrank Piva slice_is_done(m);
113*94c4a1e1SFrank Piva else
114*94c4a1e1SFrank Piva del_meta_from_list(slices_in_flight, m);
115*94c4a1e1SFrank Piva m->put_ref();
116*94c4a1e1SFrank Piva }
117*94c4a1e1SFrank Piva
__write_slices(Qcow2State & qs,const struct ublksrv_queue * q)118*94c4a1e1SFrank Piva void MetaFlushingState::__write_slices(Qcow2State &qs,
119*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
120*94c4a1e1SFrank Piva {
121*94c4a1e1SFrank Piva std::vector<Qcow2SliceMeta *> &v1 = slices_to_flush;
122*94c4a1e1SFrank Piva std::vector<Qcow2SliceMeta *>::const_iterator it = v1.cbegin();
123*94c4a1e1SFrank Piva
124*94c4a1e1SFrank Piva flush_log("%s: mapping %d to_flush %d, in_flight %d\n",
125*94c4a1e1SFrank Piva __func__, mapping, v1.size(), slices_in_flight.size());
126*94c4a1e1SFrank Piva
127*94c4a1e1SFrank Piva if (v1.empty())
128*94c4a1e1SFrank Piva return;
129*94c4a1e1SFrank Piva
130*94c4a1e1SFrank Piva while (it != v1.cend()) {
131*94c4a1e1SFrank Piva int tag;
132*94c4a1e1SFrank Piva struct ublk_io_tgt *io;
133*94c4a1e1SFrank Piva Qcow2SliceMeta *m;
134*94c4a1e1SFrank Piva
135*94c4a1e1SFrank Piva tag = qs.meta_flushing.alloc_tag(q);
136*94c4a1e1SFrank Piva if (tag == -1)
137*94c4a1e1SFrank Piva return;
138*94c4a1e1SFrank Piva m = *it;
139*94c4a1e1SFrank Piva it = v1.erase(it);
140*94c4a1e1SFrank Piva m->get_ref();
141*94c4a1e1SFrank Piva io = ublk_get_io_tgt_data(q, tag);
142*94c4a1e1SFrank Piva io->co = __write_slice_co(qs, q, m, io, tag);
143*94c4a1e1SFrank Piva }
144*94c4a1e1SFrank Piva }
145*94c4a1e1SFrank Piva
146*94c4a1e1SFrank Piva //todo: run fsync before flushing top table, and global fsync should be
147*94c4a1e1SFrank Piva //fine, given top table seldom becomes dirty
__write_top_co(Qcow2State & qs,const struct ublksrv_queue * q,struct ublk_io_tgt * io,int tag)148*94c4a1e1SFrank Piva co_io_job MetaFlushingState::__write_top_co(Qcow2State &qs,
149*94c4a1e1SFrank Piva const struct ublksrv_queue *q, struct ublk_io_tgt *io, int tag)
150*94c4a1e1SFrank Piva {
151*94c4a1e1SFrank Piva int ret;
152*94c4a1e1SFrank Piva qcow2_io_ctx_t ioc(tag, q->q_id);
153*94c4a1e1SFrank Piva bool wait;
154*94c4a1e1SFrank Piva
155*94c4a1e1SFrank Piva again:
156*94c4a1e1SFrank Piva try {
157*94c4a1e1SFrank Piva ret = top.flush(qs, ioc,
158*94c4a1e1SFrank Piva top.get_offset() + parent_blk_idx * 512, 512);
159*94c4a1e1SFrank Piva wait = false;
160*94c4a1e1SFrank Piva } catch (MetaUpdateException &meta_update_error) {
161*94c4a1e1SFrank Piva wait = true;
162*94c4a1e1SFrank Piva }
163*94c4a1e1SFrank Piva
164*94c4a1e1SFrank Piva if (wait) {
165*94c4a1e1SFrank Piva co_await__suspend_always(tag);
166*94c4a1e1SFrank Piva goto again;
167*94c4a1e1SFrank Piva }
168*94c4a1e1SFrank Piva
169*94c4a1e1SFrank Piva if (ret < 0) {
170*94c4a1e1SFrank Piva ublk_err( "%s: zero my cluster failed %d\n",
171*94c4a1e1SFrank Piva __func__, ret);
172*94c4a1e1SFrank Piva goto exit;
173*94c4a1e1SFrank Piva }
174*94c4a1e1SFrank Piva
175*94c4a1e1SFrank Piva if (ret > 0) {
176*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe;
177*94c4a1e1SFrank Piva
178*94c4a1e1SFrank Piva co_await__suspend_always(tag);
179*94c4a1e1SFrank Piva
180*94c4a1e1SFrank Piva cqe = io->tgt_io_cqe;
181*94c4a1e1SFrank Piva ret = qcow2_meta_io_done(q, cqe);
182*94c4a1e1SFrank Piva if (ret == -EAGAIN)
183*94c4a1e1SFrank Piva goto again;
184*94c4a1e1SFrank Piva if (ret < 0)
185*94c4a1e1SFrank Piva goto exit;
186*94c4a1e1SFrank Piva }
187*94c4a1e1SFrank Piva exit:
188*94c4a1e1SFrank Piva qs.meta_flushing.free_tag(q, tag);
189*94c4a1e1SFrank Piva
190*94c4a1e1SFrank Piva if (!top.get_blk_dirty(parent_blk_idx))
191*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::DONE);
192*94c4a1e1SFrank Piva }
193*94c4a1e1SFrank Piva
__write_top(Qcow2State & qs,const struct ublksrv_queue * q)194*94c4a1e1SFrank Piva void MetaFlushingState::__write_top(Qcow2State &qs,
195*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
196*94c4a1e1SFrank Piva {
197*94c4a1e1SFrank Piva int tag;
198*94c4a1e1SFrank Piva struct ublk_io_tgt *io;
199*94c4a1e1SFrank Piva
200*94c4a1e1SFrank Piva if (top.is_flushing(parent_blk_idx))
201*94c4a1e1SFrank Piva return;
202*94c4a1e1SFrank Piva
203*94c4a1e1SFrank Piva tag = qs.meta_flushing.alloc_tag(q);
204*94c4a1e1SFrank Piva if (tag == -1)
205*94c4a1e1SFrank Piva return;
206*94c4a1e1SFrank Piva
207*94c4a1e1SFrank Piva io = ublk_get_io_tgt_data(q, tag);
208*94c4a1e1SFrank Piva io->co = __write_top_co(qs, q, io, tag);
209*94c4a1e1SFrank Piva }
210*94c4a1e1SFrank Piva
__done(Qcow2State & qs,const struct ublksrv_queue * q)211*94c4a1e1SFrank Piva void MetaFlushingState::__done(Qcow2State &qs, const struct ublksrv_queue *q)
212*94c4a1e1SFrank Piva {
213*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::IDLE);
214*94c4a1e1SFrank Piva last_flush = std::chrono::system_clock::now();
215*94c4a1e1SFrank Piva }
216*94c4a1e1SFrank Piva
mark_no_update()217*94c4a1e1SFrank Piva void MetaFlushingState::mark_no_update()
218*94c4a1e1SFrank Piva {
219*94c4a1e1SFrank Piva auto it = slices_to_flush.begin();
220*94c4a1e1SFrank Piva
221*94c4a1e1SFrank Piva for (; it != slices_to_flush.end(); it++)
222*94c4a1e1SFrank Piva (*it)->set_prep_flush(true);
223*94c4a1e1SFrank Piva }
224*94c4a1e1SFrank Piva
__prep_write_slice(Qcow2State & qs,const struct ublksrv_queue * q)225*94c4a1e1SFrank Piva void MetaFlushingState::__prep_write_slice(Qcow2State &qs,
226*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
227*94c4a1e1SFrank Piva {
228*94c4a1e1SFrank Piva u64 entry;
229*94c4a1e1SFrank Piva u64 idx = -1;
230*94c4a1e1SFrank Piva u64 start, end, offset, step;
231*94c4a1e1SFrank Piva
232*94c4a1e1SFrank Piva do {
233*94c4a1e1SFrank Piva qcow2_assert(parent_entry_idx >= 0 && parent_entry_idx < (512/8));
234*94c4a1e1SFrank Piva
235*94c4a1e1SFrank Piva idx = (parent_blk_idx * 512 / 8) + parent_entry_idx;
236*94c4a1e1SFrank Piva
237*94c4a1e1SFrank Piva qcow2_assert(idx >= 0 && idx < top.get_nr_entries());
238*94c4a1e1SFrank Piva
239*94c4a1e1SFrank Piva entry = top.get_entry(idx);
240*94c4a1e1SFrank Piva if (entry && top.has_dirty_slices(qs, idx))
241*94c4a1e1SFrank Piva break;
242*94c4a1e1SFrank Piva
243*94c4a1e1SFrank Piva if (++parent_entry_idx == (512/8)) {
244*94c4a1e1SFrank Piva parent_entry_idx = 0;
245*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::WRITE_TOP);
246*94c4a1e1SFrank Piva return;
247*94c4a1e1SFrank Piva }
248*94c4a1e1SFrank Piva } while (true);
249*94c4a1e1SFrank Piva
250*94c4a1e1SFrank Piva if (mapping)
251*94c4a1e1SFrank Piva step = 1ULL << (QCOW2_PARA::L2_TABLE_SLICE_BITS - 3 +
252*94c4a1e1SFrank Piva qs.header.cluster_bits);
253*94c4a1e1SFrank Piva else
254*94c4a1e1SFrank Piva step = 1ULL << (QCOW2_PARA::REFCOUNT_BLK_SLICE_BITS - 3 +
255*94c4a1e1SFrank Piva qs.header.cluster_bits);
256*94c4a1e1SFrank Piva
257*94c4a1e1SFrank Piva start = idx << top.single_entry_order();
258*94c4a1e1SFrank Piva end = start + (1ULL << top.single_entry_order());
259*94c4a1e1SFrank Piva for (offset = start; offset < end; offset += step) {
260*94c4a1e1SFrank Piva Qcow2SliceMeta *t;
261*94c4a1e1SFrank Piva
262*94c4a1e1SFrank Piva if (mapping)
263*94c4a1e1SFrank Piva t = qs.cluster_map.__find_slice(offset);
264*94c4a1e1SFrank Piva else
265*94c4a1e1SFrank Piva t = qs.cluster_allocator.__find_slice(offset);
266*94c4a1e1SFrank Piva
267*94c4a1e1SFrank Piva if (t && t->get_dirty(-1)) {
268*94c4a1e1SFrank Piva qcow2_assert(!t->is_flushing());
269*94c4a1e1SFrank Piva add_slice_to_flush(t);
270*94c4a1e1SFrank Piva }
271*94c4a1e1SFrank Piva }
272*94c4a1e1SFrank Piva
273*94c4a1e1SFrank Piva if (slices_to_flush.size() > 0)
274*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::ZERO_MY_CLUSTER);
275*94c4a1e1SFrank Piva else
276*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::WRITE_TOP);
277*94c4a1e1SFrank Piva }
278*94c4a1e1SFrank Piva
__zero_my_cluster_co(Qcow2State & qs,const struct ublksrv_queue * q,struct ublk_io_tgt * io,int tag,Qcow2SliceMeta * m)279*94c4a1e1SFrank Piva co_io_job MetaFlushingState::__zero_my_cluster_co(Qcow2State &qs,
280*94c4a1e1SFrank Piva const struct ublksrv_queue *q, struct ublk_io_tgt *io, int tag,
281*94c4a1e1SFrank Piva Qcow2SliceMeta *m)
282*94c4a1e1SFrank Piva
283*94c4a1e1SFrank Piva {
284*94c4a1e1SFrank Piva int ret;
285*94c4a1e1SFrank Piva qcow2_io_ctx_t ioc(tag, q->q_id);
286*94c4a1e1SFrank Piva u64 cluster_off = m->get_offset() &
287*94c4a1e1SFrank Piva ~((1ULL << qs.header.cluster_bits) - 1);
288*94c4a1e1SFrank Piva bool wait;
289*94c4a1e1SFrank Piva
290*94c4a1e1SFrank Piva again:
291*94c4a1e1SFrank Piva try {
292*94c4a1e1SFrank Piva ret = m->zero_my_cluster(qs, ioc);
293*94c4a1e1SFrank Piva wait = false;
294*94c4a1e1SFrank Piva } catch (MetaUpdateException &meta_update_error) {
295*94c4a1e1SFrank Piva wait = true;
296*94c4a1e1SFrank Piva }
297*94c4a1e1SFrank Piva
298*94c4a1e1SFrank Piva if (wait) {
299*94c4a1e1SFrank Piva co_await__suspend_always(tag);
300*94c4a1e1SFrank Piva goto again;
301*94c4a1e1SFrank Piva }
302*94c4a1e1SFrank Piva
303*94c4a1e1SFrank Piva if (ret < 0) {
304*94c4a1e1SFrank Piva ublk_err( "%s: zero my cluster failed %d\n",
305*94c4a1e1SFrank Piva __func__, ret);
306*94c4a1e1SFrank Piva goto exit;
307*94c4a1e1SFrank Piva }
308*94c4a1e1SFrank Piva
309*94c4a1e1SFrank Piva if (ret > 0) {
310*94c4a1e1SFrank Piva const struct io_uring_cqe *cqe;
311*94c4a1e1SFrank Piva
312*94c4a1e1SFrank Piva co_await__suspend_always(tag);
313*94c4a1e1SFrank Piva
314*94c4a1e1SFrank Piva cqe = io->tgt_io_cqe;
315*94c4a1e1SFrank Piva ret = qcow2_meta_io_done(q, cqe);
316*94c4a1e1SFrank Piva if (ret == -EAGAIN)
317*94c4a1e1SFrank Piva goto again;
318*94c4a1e1SFrank Piva if (ret < 0)
319*94c4a1e1SFrank Piva goto exit;
320*94c4a1e1SFrank Piva }
321*94c4a1e1SFrank Piva exit:
322*94c4a1e1SFrank Piva qs.meta_flushing.free_tag(q, tag);
323*94c4a1e1SFrank Piva if (qs.cluster_allocator.alloc_cluster_is_zeroed(cluster_off)) {
324*94c4a1e1SFrank Piva //for mapping table, wait until the associated refcount
325*94c4a1e1SFrank Piva //tables are flushed out
326*94c4a1e1SFrank Piva if (mapping) {
327*94c4a1e1SFrank Piva mark_no_update();
328*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::WAIT);
329*94c4a1e1SFrank Piva } else
330*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::WRITE_SLICES);
331*94c4a1e1SFrank Piva }
332*94c4a1e1SFrank Piva m->put_ref();
333*94c4a1e1SFrank Piva }
334*94c4a1e1SFrank Piva
335*94c4a1e1SFrank Piva
__zero_my_cluster(Qcow2State & qs,const struct ublksrv_queue * q)336*94c4a1e1SFrank Piva void MetaFlushingState::__zero_my_cluster(Qcow2State &qs,
337*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
338*94c4a1e1SFrank Piva {
339*94c4a1e1SFrank Piva int tag;
340*94c4a1e1SFrank Piva struct ublk_io_tgt *io;
341*94c4a1e1SFrank Piva Qcow2SliceMeta *m = slices_to_flush[0];
342*94c4a1e1SFrank Piva u64 cluster_off = m->get_offset() &
343*94c4a1e1SFrank Piva ~((1ULL << qs.header.cluster_bits) - 1);
344*94c4a1e1SFrank Piva Qcow2ClusterState *s =
345*94c4a1e1SFrank Piva qs.cluster_allocator.get_cluster_state(cluster_off);
346*94c4a1e1SFrank Piva
347*94c4a1e1SFrank Piva if (s != nullptr && s->get_state() == QCOW2_ALLOC_ZEROING)
348*94c4a1e1SFrank Piva return;
349*94c4a1e1SFrank Piva
350*94c4a1e1SFrank Piva tag = qs.meta_flushing.alloc_tag(q);
351*94c4a1e1SFrank Piva if (tag == -1)
352*94c4a1e1SFrank Piva return;
353*94c4a1e1SFrank Piva
354*94c4a1e1SFrank Piva m->get_ref();
355*94c4a1e1SFrank Piva io = ublk_get_io_tgt_data(q, tag);
356*94c4a1e1SFrank Piva io->co = __zero_my_cluster_co(qs, q, io, tag, m);
357*94c4a1e1SFrank Piva }
358*94c4a1e1SFrank Piva
run_flush(Qcow2State & qs,const struct ublksrv_queue * q,int top_blk_idx)359*94c4a1e1SFrank Piva void MetaFlushingState::run_flush(Qcow2State &qs,
360*94c4a1e1SFrank Piva const struct ublksrv_queue *q, int top_blk_idx)
361*94c4a1e1SFrank Piva {
362*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::IDLE) {
363*94c4a1e1SFrank Piva if (top_blk_idx >= 0 && top_blk_idx < top.dirty_blk_size()) {
364*94c4a1e1SFrank Piva parent_blk_idx = top_blk_idx;
365*94c4a1e1SFrank Piva parent_entry_idx = 0;
366*94c4a1e1SFrank Piva set_state(qcow2_meta_flush::PREP_WRITE_SLICES);
367*94c4a1e1SFrank Piva }
368*94c4a1e1SFrank Piva }
369*94c4a1e1SFrank Piva again:
370*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::PREP_WRITE_SLICES)
371*94c4a1e1SFrank Piva __prep_write_slice(qs, q);
372*94c4a1e1SFrank Piva
373*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::ZERO_MY_CLUSTER)
374*94c4a1e1SFrank Piva __zero_my_cluster(qs, q);
375*94c4a1e1SFrank Piva
376*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::WAIT) {
377*94c4a1e1SFrank Piva qcow2_assert(mapping);
378*94c4a1e1SFrank Piva return;
379*94c4a1e1SFrank Piva }
380*94c4a1e1SFrank Piva
381*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::WRITE_SLICES)
382*94c4a1e1SFrank Piva __write_slices(qs, q);
383*94c4a1e1SFrank Piva
384*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::WRITE_TOP)
385*94c4a1e1SFrank Piva __write_top(qs, q);
386*94c4a1e1SFrank Piva
387*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::DONE)
388*94c4a1e1SFrank Piva __done(qs, q);
389*94c4a1e1SFrank Piva
390*94c4a1e1SFrank Piva if (state == qcow2_meta_flush::PREP_WRITE_SLICES)
391*94c4a1e1SFrank Piva goto again;
392*94c4a1e1SFrank Piva }
393*94c4a1e1SFrank Piva
dump(const char * func,int line) const394*94c4a1e1SFrank Piva void MetaFlushingState::dump(const char *func, int line) const {
395*94c4a1e1SFrank Piva qcow2_log("%s %d: mapping %d state %d blk_idx %d entry_idx %d list size(%ld %ld)"
396*94c4a1e1SFrank Piva " dirty slices %u, top table dirty blocks %u\n",
397*94c4a1e1SFrank Piva func, line, mapping, state,
398*94c4a1e1SFrank Piva parent_blk_idx, parent_entry_idx,
399*94c4a1e1SFrank Piva slices_to_flush.size(),
400*94c4a1e1SFrank Piva slices_in_flight.size(),
401*94c4a1e1SFrank Piva slice_dirtied, top.dirty_blks());
402*94c4a1e1SFrank Piva }
403*94c4a1e1SFrank Piva
__need_flush(int queued)404*94c4a1e1SFrank Piva bool MetaFlushingState::__need_flush(int queued)
405*94c4a1e1SFrank Piva {
406*94c4a1e1SFrank Piva bool need_flush = slice_dirtied > 0;
407*94c4a1e1SFrank Piva
408*94c4a1e1SFrank Piva if (!need_flush)
409*94c4a1e1SFrank Piva need_flush = top.dirty_blks() > 0;
410*94c4a1e1SFrank Piva
411*94c4a1e1SFrank Piva if (!need_flush)
412*94c4a1e1SFrank Piva return false;
413*94c4a1e1SFrank Piva
414*94c4a1e1SFrank Piva if (queued) {
415*94c4a1e1SFrank Piva auto diff = std::chrono::system_clock::now() - last_flush;
416*94c4a1e1SFrank Piva std::chrono::milliseconds ms = std::chrono::duration_cast<
417*94c4a1e1SFrank Piva std::chrono::milliseconds>(diff);
418*94c4a1e1SFrank Piva
419*94c4a1e1SFrank Piva //timeout, so flush now
420*94c4a1e1SFrank Piva if (ms.count() > MAX_META_FLUSH_DELAY_MS)
421*94c4a1e1SFrank Piva return true;
422*94c4a1e1SFrank Piva else
423*94c4a1e1SFrank Piva return false;
424*94c4a1e1SFrank Piva }
425*94c4a1e1SFrank Piva
426*94c4a1e1SFrank Piva /* queue is idle, so have to flush immediately */
427*94c4a1e1SFrank Piva return true;
428*94c4a1e1SFrank Piva }
429*94c4a1e1SFrank Piva
need_flush(Qcow2State & qs,int * top_idx,unsigned queued)430*94c4a1e1SFrank Piva bool MetaFlushingState::need_flush(Qcow2State &qs, int *top_idx,
431*94c4a1e1SFrank Piva unsigned queued)
432*94c4a1e1SFrank Piva {
433*94c4a1e1SFrank Piva bool need_flush = get_state() > qcow2_meta_flush::IDLE;
434*94c4a1e1SFrank Piva int idx = -1;
435*94c4a1e1SFrank Piva
436*94c4a1e1SFrank Piva if (!need_flush) {
437*94c4a1e1SFrank Piva if (mapping)
438*94c4a1e1SFrank Piva need_flush = qs.cluster_map.
439*94c4a1e1SFrank Piva has_evicted_dirty_slices();
440*94c4a1e1SFrank Piva else
441*94c4a1e1SFrank Piva need_flush = qs.cluster_allocator.
442*94c4a1e1SFrank Piva has_evicted_dirty_slices();
443*94c4a1e1SFrank Piva
444*94c4a1e1SFrank Piva //only flush refcount tables actively if there
445*94c4a1e1SFrank Piva //are evicted dirty refcount slices
446*94c4a1e1SFrank Piva if (!need_flush)
447*94c4a1e1SFrank Piva need_flush = __need_flush(queued);
448*94c4a1e1SFrank Piva }
449*94c4a1e1SFrank Piva
450*94c4a1e1SFrank Piva if (need_flush && get_state() == qcow2_meta_flush::IDLE) {
451*94c4a1e1SFrank Piva if (mapping)
452*94c4a1e1SFrank Piva idx = qs.cluster_map.figure_group_from_l1_table();
453*94c4a1e1SFrank Piva else
454*94c4a1e1SFrank Piva idx = qs.cluster_allocator.figure_group_from_refcount_table();
455*94c4a1e1SFrank Piva
456*94c4a1e1SFrank Piva //idx is more accurate than slice_dirtied
457*94c4a1e1SFrank Piva //FIXME: make slice_dirtied more accurate
458*94c4a1e1SFrank Piva if (idx == -1) {
459*94c4a1e1SFrank Piva need_flush = false;
460*94c4a1e1SFrank Piva slice_dirtied = 0;
461*94c4a1e1SFrank Piva }
462*94c4a1e1SFrank Piva }
463*94c4a1e1SFrank Piva
464*94c4a1e1SFrank Piva *top_idx = idx;
465*94c4a1e1SFrank Piva return need_flush;
466*94c4a1e1SFrank Piva }
467*94c4a1e1SFrank Piva
468*94c4a1e1SFrank Piva //calculate the 1st index of refcount table, in which the to-be-flushed
469*94c4a1e1SFrank Piva //l2's entries depend on
calc_refcount_dirty_blk_range(Qcow2State & qs,int * refcnt_blk_start,int * refcnt_blk_end)470*94c4a1e1SFrank Piva int MetaFlushingState::calc_refcount_dirty_blk_range(Qcow2State& qs,
471*94c4a1e1SFrank Piva int *refcnt_blk_start, int *refcnt_blk_end)
472*94c4a1e1SFrank Piva {
473*94c4a1e1SFrank Piva u64 s = (u64)-1;
474*94c4a1e1SFrank Piva u64 e = 0;
475*94c4a1e1SFrank Piva u64 l2_offset = 0;
476*94c4a1e1SFrank Piva int start_idx, end_idx;
477*94c4a1e1SFrank Piva
478*94c4a1e1SFrank Piva qcow2_assert(mapping);
479*94c4a1e1SFrank Piva
480*94c4a1e1SFrank Piva for (auto it = slices_to_flush.begin(); it != slices_to_flush.end();
481*94c4a1e1SFrank Piva it++) {
482*94c4a1e1SFrank Piva u64 ts, te;
483*94c4a1e1SFrank Piva
484*94c4a1e1SFrank Piva qcow2_assert((*it)->get_dirty(-1));
485*94c4a1e1SFrank Piva
486*94c4a1e1SFrank Piva (*it)->get_dirty_range(&ts, &te);
487*94c4a1e1SFrank Piva
488*94c4a1e1SFrank Piva if (!l2_offset)
489*94c4a1e1SFrank Piva l2_offset = (*it)->get_offset() & ~((1ULL <<
490*94c4a1e1SFrank Piva qs.header.cluster_bits) - 1);
491*94c4a1e1SFrank Piva
492*94c4a1e1SFrank Piva if (ts > te)
493*94c4a1e1SFrank Piva continue;
494*94c4a1e1SFrank Piva if (ts < s)
495*94c4a1e1SFrank Piva s = ts;
496*94c4a1e1SFrank Piva if (te > e)
497*94c4a1e1SFrank Piva e = te;
498*94c4a1e1SFrank Piva }
499*94c4a1e1SFrank Piva
500*94c4a1e1SFrank Piva if (s > e)
501*94c4a1e1SFrank Piva return -EINVAL;
502*94c4a1e1SFrank Piva
503*94c4a1e1SFrank Piva //this l2 should be considered too
504*94c4a1e1SFrank Piva if (l2_offset && l2_offset < s)
505*94c4a1e1SFrank Piva s = l2_offset;
506*94c4a1e1SFrank Piva
507*94c4a1e1SFrank Piva start_idx = qs.refcount_table.offset_to_idx(s);
508*94c4a1e1SFrank Piva *refcnt_blk_start = start_idx >> (qs.get_min_flush_unit_bits() - 3);
509*94c4a1e1SFrank Piva
510*94c4a1e1SFrank Piva end_idx = qs.refcount_table.offset_to_idx(e);
511*94c4a1e1SFrank Piva *refcnt_blk_end = end_idx >> (qs.get_min_flush_unit_bits() - 3);
512*94c4a1e1SFrank Piva *refcnt_blk_end += 1;
513*94c4a1e1SFrank Piva
514*94c4a1e1SFrank Piva flush_log("%s: %lx-%lx idx (%d %d) blk idx(%d %d)\n", __func__, s, e,
515*94c4a1e1SFrank Piva start_idx, end_idx, *refcnt_blk_start, *refcnt_blk_end);
516*94c4a1e1SFrank Piva
517*94c4a1e1SFrank Piva if (*refcnt_blk_start == *refcnt_blk_end)
518*94c4a1e1SFrank Piva *refcnt_blk_end = *refcnt_blk_start + 1;
519*94c4a1e1SFrank Piva
520*94c4a1e1SFrank Piva if (*refcnt_blk_start >= *refcnt_blk_end)
521*94c4a1e1SFrank Piva qcow2_log("%s: %lx-%lx bad idx %d %d\n", __func__, s, e,
522*94c4a1e1SFrank Piva *refcnt_blk_start, *refcnt_blk_end);
523*94c4a1e1SFrank Piva
524*94c4a1e1SFrank Piva qcow2_assert(*refcnt_blk_start < *refcnt_blk_end);
525*94c4a1e1SFrank Piva
526*94c4a1e1SFrank Piva return 0;
527*94c4a1e1SFrank Piva }
528*94c4a1e1SFrank Piva
Qcow2MetaFlushing(Qcow2State & qs)529*94c4a1e1SFrank Piva Qcow2MetaFlushing::Qcow2MetaFlushing(Qcow2State &qs):
530*94c4a1e1SFrank Piva tags(QCOW2_PARA::META_MAX_TAGS),
531*94c4a1e1SFrank Piva refcnt_blk_start(-1),
532*94c4a1e1SFrank Piva refcnt_blk_end(-1),
533*94c4a1e1SFrank Piva state(qs),
534*94c4a1e1SFrank Piva mapping_stat(qs.l1_table, true),
535*94c4a1e1SFrank Piva refcount_stat(qs.refcount_table, false)
536*94c4a1e1SFrank Piva {
537*94c4a1e1SFrank Piva for (int i = 0; i < tags.size(); i++)
538*94c4a1e1SFrank Piva tags[i] = true;
539*94c4a1e1SFrank Piva }
540*94c4a1e1SFrank Piva
alloc_tag(const struct ublksrv_queue * q)541*94c4a1e1SFrank Piva int Qcow2MetaFlushing::alloc_tag(const struct ublksrv_queue *q) {
542*94c4a1e1SFrank Piva for (size_t i = 0; i < tags.size(); i++) {
543*94c4a1e1SFrank Piva if (tags[i]) {
544*94c4a1e1SFrank Piva tags[i] = false;
545*94c4a1e1SFrank Piva return i + q->q_depth;
546*94c4a1e1SFrank Piva }
547*94c4a1e1SFrank Piva }
548*94c4a1e1SFrank Piva return -1;
549*94c4a1e1SFrank Piva }
550*94c4a1e1SFrank Piva
free_tag(const struct ublksrv_queue * q,int tag)551*94c4a1e1SFrank Piva void Qcow2MetaFlushing::free_tag(const struct ublksrv_queue *q, int tag) {
552*94c4a1e1SFrank Piva int depth = q->q_depth;
553*94c4a1e1SFrank Piva
554*94c4a1e1SFrank Piva qcow2_assert(tag >= depth && tag < depth + tags.size());
555*94c4a1e1SFrank Piva tags[tag - depth] = true;
556*94c4a1e1SFrank Piva }
557*94c4a1e1SFrank Piva
dump()558*94c4a1e1SFrank Piva void Qcow2MetaFlushing::dump()
559*94c4a1e1SFrank Piva {
560*94c4a1e1SFrank Piva ublk_err( "meta flushing: mapping: dirty slices %u, l1 dirty blocks %u\n",
561*94c4a1e1SFrank Piva mapping_stat.slice_dirtied,
562*94c4a1e1SFrank Piva state.l1_table.dirty_blks());
563*94c4a1e1SFrank Piva ublk_err( "meta flushing: refcount: dirty slices %u, refcount table dirty blocks %u\n",
564*94c4a1e1SFrank Piva refcount_stat.slice_dirtied,
565*94c4a1e1SFrank Piva state.refcount_table.dirty_blks());
566*94c4a1e1SFrank Piva }
567*94c4a1e1SFrank Piva
handle_mapping_dependency_start_end(Qcow2State * qs,const struct ublksrv_queue * q)568*94c4a1e1SFrank Piva bool Qcow2MetaFlushing::handle_mapping_dependency_start_end(Qcow2State *qs,
569*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
570*94c4a1e1SFrank Piva {
571*94c4a1e1SFrank Piva if (refcount_stat.get_state() == qcow2_meta_flush::IDLE &&
572*94c4a1e1SFrank Piva (refcnt_blk_start == refcnt_blk_end)) {
573*94c4a1e1SFrank Piva int ret;
574*94c4a1e1SFrank Piva
575*94c4a1e1SFrank Piva //current flushing refcnt is done
576*94c4a1e1SFrank Piva if (refcnt_blk_start >= 0) {
577*94c4a1e1SFrank Piva mapping_stat.set_state(
578*94c4a1e1SFrank Piva qcow2_meta_flush::WRITE_SLICES);
579*94c4a1e1SFrank Piva refcnt_blk_start = refcnt_blk_end = -1;
580*94c4a1e1SFrank Piva mapping_stat.run_flush(state, q, -1);
581*94c4a1e1SFrank Piva
582*94c4a1e1SFrank Piva return true;
583*94c4a1e1SFrank Piva } else { //current flushing is just started
584*94c4a1e1SFrank Piva ret = mapping_stat.calc_refcount_dirty_blk_range(
585*94c4a1e1SFrank Piva *qs, &refcnt_blk_start, &refcnt_blk_end);
586*94c4a1e1SFrank Piva
587*94c4a1e1SFrank Piva if (ret < 0) {
588*94c4a1e1SFrank Piva mapping_stat.set_state(
589*94c4a1e1SFrank Piva qcow2_meta_flush::WRITE_SLICES);
590*94c4a1e1SFrank Piva mapping_stat.run_flush(state, q, -1);
591*94c4a1e1SFrank Piva return true;
592*94c4a1e1SFrank Piva }
593*94c4a1e1SFrank Piva }
594*94c4a1e1SFrank Piva }
595*94c4a1e1SFrank Piva
596*94c4a1e1SFrank Piva return false;
597*94c4a1e1SFrank Piva }
598*94c4a1e1SFrank Piva
handle_mapping_dependency(Qcow2State * qs,const struct ublksrv_queue * q)599*94c4a1e1SFrank Piva void Qcow2MetaFlushing::handle_mapping_dependency(Qcow2State *qs,
600*94c4a1e1SFrank Piva const struct ublksrv_queue *q)
601*94c4a1e1SFrank Piva {
602*94c4a1e1SFrank Piva qcow2_assert(mapping_stat.get_state() == qcow2_meta_flush::WAIT);
603*94c4a1e1SFrank Piva
604*94c4a1e1SFrank Piva if (!handle_mapping_dependency_start_end(qs, q)) {
605*94c4a1e1SFrank Piva
606*94c4a1e1SFrank Piva refcount_stat.run_flush(state, q, refcnt_blk_start);
607*94c4a1e1SFrank Piva
608*94c4a1e1SFrank Piva while (refcount_stat.get_state() == qcow2_meta_flush::IDLE &&
609*94c4a1e1SFrank Piva (++refcnt_blk_start < refcnt_blk_end))
610*94c4a1e1SFrank Piva refcount_stat.run_flush(state, q, refcnt_blk_start);
611*94c4a1e1SFrank Piva handle_mapping_dependency_start_end(qs, q);
612*94c4a1e1SFrank Piva }
613*94c4a1e1SFrank Piva
614*94c4a1e1SFrank Piva if (mapping_stat.get_state() != qcow2_meta_flush::WAIT)
615*94c4a1e1SFrank Piva mapping_stat.run_flush(state, q, -1);
616*94c4a1e1SFrank Piva }
617*94c4a1e1SFrank Piva
is_flushing()618*94c4a1e1SFrank Piva bool Qcow2MetaFlushing::is_flushing()
619*94c4a1e1SFrank Piva {
620*94c4a1e1SFrank Piva return mapping_stat.get_state() != qcow2_meta_flush::IDLE ||
621*94c4a1e1SFrank Piva refcount_stat.get_state() != qcow2_meta_flush::IDLE;
622*94c4a1e1SFrank Piva }
623*94c4a1e1SFrank Piva
run_flush(const struct ublksrv_queue * q,int queued)624*94c4a1e1SFrank Piva void Qcow2MetaFlushing::run_flush(const struct ublksrv_queue *q, int queued)
625*94c4a1e1SFrank Piva {
626*94c4a1e1SFrank Piva Qcow2State *qs = queue_to_qcow2state(q);
627*94c4a1e1SFrank Piva bool need_flush;
628*94c4a1e1SFrank Piva int map_idx = -1;
629*94c4a1e1SFrank Piva int refcnt_idx = -1;
630*94c4a1e1SFrank Piva
631*94c4a1e1SFrank Piva need_flush = mapping_stat.need_flush(*qs, &map_idx, queued);
632*94c4a1e1SFrank Piva need_flush |= refcount_stat.need_flush(*qs, &refcnt_idx, queued);
633*94c4a1e1SFrank Piva
634*94c4a1e1SFrank Piva if (need_flush)
635*94c4a1e1SFrank Piva flush_log("%s: enter flush: state %d/%d top blk idx %d/%d queued %d, refcnt blks(%d %d)\n",
636*94c4a1e1SFrank Piva __func__, mapping_stat.get_state(),
637*94c4a1e1SFrank Piva refcount_stat.get_state(), map_idx, refcnt_idx,
638*94c4a1e1SFrank Piva queued, refcnt_blk_start, refcnt_blk_end);
639*94c4a1e1SFrank Piva
640*94c4a1e1SFrank Piva //refcount tables flushing is always triggered by flushing mapping
641*94c4a1e1SFrank Piva //tables
642*94c4a1e1SFrank Piva if (need_flush)
643*94c4a1e1SFrank Piva mapping_stat.run_flush(state, q, map_idx);
644*94c4a1e1SFrank Piva
645*94c4a1e1SFrank Piva if (mapping_stat.get_state() == qcow2_meta_flush::WAIT)
646*94c4a1e1SFrank Piva handle_mapping_dependency(qs, q);
647*94c4a1e1SFrank Piva
648*94c4a1e1SFrank Piva if (need_flush)
649*94c4a1e1SFrank Piva flush_log("%s: exit flush: state %d/%d queued %d refcnt blks(%d %d) has dirty slice %d\n",
650*94c4a1e1SFrank Piva __func__, mapping_stat.get_state(),
651*94c4a1e1SFrank Piva refcount_stat.get_state(), queued,
652*94c4a1e1SFrank Piva refcnt_blk_start, refcnt_blk_end,
653*94c4a1e1SFrank Piva qs->has_dirty_slice());
654*94c4a1e1SFrank Piva }
655