1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * Copyright (C) 2012 Red Hat, Inc.
4 *
5 * This file is released under the GPL.
6 */
7
8 #include "dm.h"
9 #include "dm-bio-prison-v1.h"
10 #include "dm-bio-prison-v2.h"
11
12 #include <linux/spinlock.h>
13 #include <linux/mempool.h>
14 #include <linux/module.h>
15 #include <linux/slab.h>
16
17 /*----------------------------------------------------------------*/
18
19 #define MIN_CELLS 1024
20
21 struct prison_region {
22 spinlock_t lock;
23 struct rb_root cell;
24 } ____cacheline_aligned_in_smp;
25
26 struct dm_bio_prison {
27 mempool_t cell_pool;
28 unsigned int num_locks;
29 struct prison_region regions[] __counted_by(num_locks);
30 };
31
32 static struct kmem_cache *_cell_cache;
33
34 /*----------------------------------------------------------------*/
35
36 /*
37 * @nr_cells should be the number of cells you want in use _concurrently_.
38 * Don't confuse it with the number of distinct keys.
39 */
dm_bio_prison_create(void)40 struct dm_bio_prison *dm_bio_prison_create(void)
41 {
42 int ret;
43 unsigned int i, num_locks;
44 struct dm_bio_prison *prison;
45
46 num_locks = dm_num_hash_locks();
47 prison = kzalloc(struct_size(prison, regions, num_locks), GFP_KERNEL);
48 if (!prison)
49 return NULL;
50 prison->num_locks = num_locks;
51
52 for (i = 0; i < prison->num_locks; i++) {
53 spin_lock_init(&prison->regions[i].lock);
54 prison->regions[i].cell = RB_ROOT;
55 }
56
57 ret = mempool_init_slab_pool(&prison->cell_pool, MIN_CELLS, _cell_cache);
58 if (ret) {
59 kfree(prison);
60 return NULL;
61 }
62
63 return prison;
64 }
65 EXPORT_SYMBOL_GPL(dm_bio_prison_create);
66
dm_bio_prison_destroy(struct dm_bio_prison * prison)67 void dm_bio_prison_destroy(struct dm_bio_prison *prison)
68 {
69 mempool_exit(&prison->cell_pool);
70 kfree(prison);
71 }
72 EXPORT_SYMBOL_GPL(dm_bio_prison_destroy);
73
dm_bio_prison_alloc_cell(struct dm_bio_prison * prison,gfp_t gfp)74 struct dm_bio_prison_cell *dm_bio_prison_alloc_cell(struct dm_bio_prison *prison, gfp_t gfp)
75 {
76 return mempool_alloc(&prison->cell_pool, gfp);
77 }
78 EXPORT_SYMBOL_GPL(dm_bio_prison_alloc_cell);
79
dm_bio_prison_free_cell(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell)80 void dm_bio_prison_free_cell(struct dm_bio_prison *prison,
81 struct dm_bio_prison_cell *cell)
82 {
83 mempool_free(cell, &prison->cell_pool);
84 }
85 EXPORT_SYMBOL_GPL(dm_bio_prison_free_cell);
86
__setup_new_cell(struct dm_cell_key * key,struct bio * holder,struct dm_bio_prison_cell * cell)87 static void __setup_new_cell(struct dm_cell_key *key,
88 struct bio *holder,
89 struct dm_bio_prison_cell *cell)
90 {
91 memcpy(&cell->key, key, sizeof(cell->key));
92 cell->holder = holder;
93 bio_list_init(&cell->bios);
94 }
95
cmp_keys(struct dm_cell_key * lhs,struct dm_cell_key * rhs)96 static int cmp_keys(struct dm_cell_key *lhs,
97 struct dm_cell_key *rhs)
98 {
99 if (lhs->virtual < rhs->virtual)
100 return -1;
101
102 if (lhs->virtual > rhs->virtual)
103 return 1;
104
105 if (lhs->dev < rhs->dev)
106 return -1;
107
108 if (lhs->dev > rhs->dev)
109 return 1;
110
111 if (lhs->block_end <= rhs->block_begin)
112 return -1;
113
114 if (lhs->block_begin >= rhs->block_end)
115 return 1;
116
117 return 0;
118 }
119
lock_nr(struct dm_cell_key * key,unsigned int num_locks)120 static inline unsigned int lock_nr(struct dm_cell_key *key, unsigned int num_locks)
121 {
122 return dm_hash_locks_index((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT),
123 num_locks);
124 }
125
dm_cell_key_has_valid_range(struct dm_cell_key * key)126 bool dm_cell_key_has_valid_range(struct dm_cell_key *key)
127 {
128 if (WARN_ON_ONCE(key->block_end - key->block_begin > BIO_PRISON_MAX_RANGE))
129 return false;
130 if (WARN_ON_ONCE((key->block_begin >> BIO_PRISON_MAX_RANGE_SHIFT) !=
131 (key->block_end - 1) >> BIO_PRISON_MAX_RANGE_SHIFT))
132 return false;
133
134 return true;
135 }
136 EXPORT_SYMBOL(dm_cell_key_has_valid_range);
137
__bio_detain(struct rb_root * root,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)138 static int __bio_detain(struct rb_root *root,
139 struct dm_cell_key *key,
140 struct bio *inmate,
141 struct dm_bio_prison_cell *cell_prealloc,
142 struct dm_bio_prison_cell **cell_result)
143 {
144 int r;
145 struct rb_node **new = &root->rb_node, *parent = NULL;
146
147 while (*new) {
148 struct dm_bio_prison_cell *cell =
149 rb_entry(*new, struct dm_bio_prison_cell, node);
150
151 r = cmp_keys(key, &cell->key);
152
153 parent = *new;
154 if (r < 0)
155 new = &((*new)->rb_left);
156 else if (r > 0)
157 new = &((*new)->rb_right);
158 else {
159 if (inmate)
160 bio_list_add(&cell->bios, inmate);
161 *cell_result = cell;
162 return 1;
163 }
164 }
165
166 __setup_new_cell(key, inmate, cell_prealloc);
167 *cell_result = cell_prealloc;
168
169 rb_link_node(&cell_prealloc->node, parent, new);
170 rb_insert_color(&cell_prealloc->node, root);
171
172 return 0;
173 }
174
bio_detain(struct dm_bio_prison * prison,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)175 static int bio_detain(struct dm_bio_prison *prison,
176 struct dm_cell_key *key,
177 struct bio *inmate,
178 struct dm_bio_prison_cell *cell_prealloc,
179 struct dm_bio_prison_cell **cell_result)
180 {
181 int r;
182 unsigned l = lock_nr(key, prison->num_locks);
183
184 spin_lock_irq(&prison->regions[l].lock);
185 r = __bio_detain(&prison->regions[l].cell, key, inmate, cell_prealloc, cell_result);
186 spin_unlock_irq(&prison->regions[l].lock);
187
188 return r;
189 }
190
dm_bio_detain(struct dm_bio_prison * prison,struct dm_cell_key * key,struct bio * inmate,struct dm_bio_prison_cell * cell_prealloc,struct dm_bio_prison_cell ** cell_result)191 int dm_bio_detain(struct dm_bio_prison *prison,
192 struct dm_cell_key *key,
193 struct bio *inmate,
194 struct dm_bio_prison_cell *cell_prealloc,
195 struct dm_bio_prison_cell **cell_result)
196 {
197 return bio_detain(prison, key, inmate, cell_prealloc, cell_result);
198 }
199 EXPORT_SYMBOL_GPL(dm_bio_detain);
200
201 /*
202 * @inmates must have been initialised prior to this call
203 */
__cell_release(struct rb_root * root,struct dm_bio_prison_cell * cell,struct bio_list * inmates)204 static void __cell_release(struct rb_root *root,
205 struct dm_bio_prison_cell *cell,
206 struct bio_list *inmates)
207 {
208 rb_erase(&cell->node, root);
209
210 if (inmates) {
211 if (cell->holder)
212 bio_list_add(inmates, cell->holder);
213 bio_list_merge(inmates, &cell->bios);
214 }
215 }
216
dm_cell_release(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,struct bio_list * bios)217 void dm_cell_release(struct dm_bio_prison *prison,
218 struct dm_bio_prison_cell *cell,
219 struct bio_list *bios)
220 {
221 unsigned l = lock_nr(&cell->key, prison->num_locks);
222
223 spin_lock_irq(&prison->regions[l].lock);
224 __cell_release(&prison->regions[l].cell, cell, bios);
225 spin_unlock_irq(&prison->regions[l].lock);
226 }
227 EXPORT_SYMBOL_GPL(dm_cell_release);
228
229 /*
230 * Sometimes we don't want the holder, just the additional bios.
231 */
__cell_release_no_holder(struct rb_root * root,struct dm_bio_prison_cell * cell,struct bio_list * inmates)232 static void __cell_release_no_holder(struct rb_root *root,
233 struct dm_bio_prison_cell *cell,
234 struct bio_list *inmates)
235 {
236 rb_erase(&cell->node, root);
237 bio_list_merge(inmates, &cell->bios);
238 }
239
dm_cell_release_no_holder(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,struct bio_list * inmates)240 void dm_cell_release_no_holder(struct dm_bio_prison *prison,
241 struct dm_bio_prison_cell *cell,
242 struct bio_list *inmates)
243 {
244 unsigned l = lock_nr(&cell->key, prison->num_locks);
245 unsigned long flags;
246
247 spin_lock_irqsave(&prison->regions[l].lock, flags);
248 __cell_release_no_holder(&prison->regions[l].cell, cell, inmates);
249 spin_unlock_irqrestore(&prison->regions[l].lock, flags);
250 }
251 EXPORT_SYMBOL_GPL(dm_cell_release_no_holder);
252
dm_cell_error(struct dm_bio_prison * prison,struct dm_bio_prison_cell * cell,blk_status_t error)253 void dm_cell_error(struct dm_bio_prison *prison,
254 struct dm_bio_prison_cell *cell, blk_status_t error)
255 {
256 struct bio_list bios;
257 struct bio *bio;
258
259 bio_list_init(&bios);
260 dm_cell_release(prison, cell, &bios);
261
262 while ((bio = bio_list_pop(&bios))) {
263 bio->bi_status = error;
264 bio_endio(bio);
265 }
266 }
267 EXPORT_SYMBOL_GPL(dm_cell_error);
268
dm_cell_visit_release(struct dm_bio_prison * prison,void (* visit_fn)(void *,struct dm_bio_prison_cell *),void * context,struct dm_bio_prison_cell * cell)269 void dm_cell_visit_release(struct dm_bio_prison *prison,
270 void (*visit_fn)(void *, struct dm_bio_prison_cell *),
271 void *context,
272 struct dm_bio_prison_cell *cell)
273 {
274 unsigned l = lock_nr(&cell->key, prison->num_locks);
275 spin_lock_irq(&prison->regions[l].lock);
276 visit_fn(context, cell);
277 rb_erase(&cell->node, &prison->regions[l].cell);
278 spin_unlock_irq(&prison->regions[l].lock);
279 }
280 EXPORT_SYMBOL_GPL(dm_cell_visit_release);
281
282 /*----------------------------------------------------------------*/
283
284 #define DEFERRED_SET_SIZE 64
285
286 struct dm_deferred_entry {
287 struct dm_deferred_set *ds;
288 unsigned int count;
289 struct list_head work_items;
290 };
291
292 struct dm_deferred_set {
293 spinlock_t lock;
294 unsigned int current_entry;
295 unsigned int sweeper;
296 struct dm_deferred_entry entries[DEFERRED_SET_SIZE];
297 };
298
dm_deferred_set_create(void)299 struct dm_deferred_set *dm_deferred_set_create(void)
300 {
301 int i;
302 struct dm_deferred_set *ds;
303
304 ds = kmalloc(sizeof(*ds), GFP_KERNEL);
305 if (!ds)
306 return NULL;
307
308 spin_lock_init(&ds->lock);
309 ds->current_entry = 0;
310 ds->sweeper = 0;
311 for (i = 0; i < DEFERRED_SET_SIZE; i++) {
312 ds->entries[i].ds = ds;
313 ds->entries[i].count = 0;
314 INIT_LIST_HEAD(&ds->entries[i].work_items);
315 }
316
317 return ds;
318 }
319 EXPORT_SYMBOL_GPL(dm_deferred_set_create);
320
dm_deferred_set_destroy(struct dm_deferred_set * ds)321 void dm_deferred_set_destroy(struct dm_deferred_set *ds)
322 {
323 kfree(ds);
324 }
325 EXPORT_SYMBOL_GPL(dm_deferred_set_destroy);
326
dm_deferred_entry_inc(struct dm_deferred_set * ds)327 struct dm_deferred_entry *dm_deferred_entry_inc(struct dm_deferred_set *ds)
328 {
329 unsigned long flags;
330 struct dm_deferred_entry *entry;
331
332 spin_lock_irqsave(&ds->lock, flags);
333 entry = ds->entries + ds->current_entry;
334 entry->count++;
335 spin_unlock_irqrestore(&ds->lock, flags);
336
337 return entry;
338 }
339 EXPORT_SYMBOL_GPL(dm_deferred_entry_inc);
340
ds_next(unsigned int index)341 static unsigned int ds_next(unsigned int index)
342 {
343 return (index + 1) % DEFERRED_SET_SIZE;
344 }
345
__sweep(struct dm_deferred_set * ds,struct list_head * head)346 static void __sweep(struct dm_deferred_set *ds, struct list_head *head)
347 {
348 while ((ds->sweeper != ds->current_entry) &&
349 !ds->entries[ds->sweeper].count) {
350 list_splice_init(&ds->entries[ds->sweeper].work_items, head);
351 ds->sweeper = ds_next(ds->sweeper);
352 }
353
354 if ((ds->sweeper == ds->current_entry) && !ds->entries[ds->sweeper].count)
355 list_splice_init(&ds->entries[ds->sweeper].work_items, head);
356 }
357
dm_deferred_entry_dec(struct dm_deferred_entry * entry,struct list_head * head)358 void dm_deferred_entry_dec(struct dm_deferred_entry *entry, struct list_head *head)
359 {
360 unsigned long flags;
361
362 spin_lock_irqsave(&entry->ds->lock, flags);
363 BUG_ON(!entry->count);
364 --entry->count;
365 __sweep(entry->ds, head);
366 spin_unlock_irqrestore(&entry->ds->lock, flags);
367 }
368 EXPORT_SYMBOL_GPL(dm_deferred_entry_dec);
369
370 /*
371 * Returns 1 if deferred or 0 if no pending items to delay job.
372 */
dm_deferred_set_add_work(struct dm_deferred_set * ds,struct list_head * work)373 int dm_deferred_set_add_work(struct dm_deferred_set *ds, struct list_head *work)
374 {
375 int r = 1;
376 unsigned int next_entry;
377
378 spin_lock_irq(&ds->lock);
379 if ((ds->sweeper == ds->current_entry) &&
380 !ds->entries[ds->current_entry].count)
381 r = 0;
382 else {
383 list_add(work, &ds->entries[ds->current_entry].work_items);
384 next_entry = ds_next(ds->current_entry);
385 if (!ds->entries[next_entry].count)
386 ds->current_entry = next_entry;
387 }
388 spin_unlock_irq(&ds->lock);
389
390 return r;
391 }
392 EXPORT_SYMBOL_GPL(dm_deferred_set_add_work);
393
394 /*----------------------------------------------------------------*/
395
dm_bio_prison_init_v1(void)396 static int __init dm_bio_prison_init_v1(void)
397 {
398 _cell_cache = KMEM_CACHE(dm_bio_prison_cell, 0);
399 if (!_cell_cache)
400 return -ENOMEM;
401
402 return 0;
403 }
404
dm_bio_prison_exit_v1(void)405 static void dm_bio_prison_exit_v1(void)
406 {
407 kmem_cache_destroy(_cell_cache);
408 _cell_cache = NULL;
409 }
410
411 static int (*_inits[])(void) __initdata = {
412 dm_bio_prison_init_v1,
413 dm_bio_prison_init_v2,
414 };
415
416 static void (*_exits[])(void) = {
417 dm_bio_prison_exit_v1,
418 dm_bio_prison_exit_v2,
419 };
420
dm_bio_prison_init(void)421 static int __init dm_bio_prison_init(void)
422 {
423 const int count = ARRAY_SIZE(_inits);
424
425 int r, i;
426
427 for (i = 0; i < count; i++) {
428 r = _inits[i]();
429 if (r)
430 goto bad;
431 }
432
433 return 0;
434
435 bad:
436 while (i--)
437 _exits[i]();
438
439 return r;
440 }
441
dm_bio_prison_exit(void)442 static void __exit dm_bio_prison_exit(void)
443 {
444 int i = ARRAY_SIZE(_exits);
445
446 while (i--)
447 _exits[i]();
448 }
449
450 /*
451 * module hooks
452 */
453 module_init(dm_bio_prison_init);
454 module_exit(dm_bio_prison_exit);
455
456 MODULE_DESCRIPTION(DM_NAME " bio prison");
457 MODULE_AUTHOR("Joe Thornber <[email protected]>");
458 MODULE_LICENSE("GPL");
459