1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Bad block management
4 *
5 * - Heavily based on MD badblocks code from Neil Brown
6 *
7 * Copyright (c) 2015, Intel Corporation.
8 */
9
10 #include <linux/badblocks.h>
11 #include <linux/seqlock.h>
12 #include <linux/device.h>
13 #include <linux/kernel.h>
14 #include <linux/module.h>
15 #include <linux/stddef.h>
16 #include <linux/types.h>
17 #include <linux/slab.h>
18
19 /*
20 * The purpose of badblocks set/clear is to manage bad blocks ranges which are
21 * identified by LBA addresses.
22 *
23 * When the caller of badblocks_set() wants to set a range of bad blocks, the
24 * setting range can be acked or unacked. And the setting range may merge,
25 * overwrite, skip the overlapped already set range, depends on who they are
26 * overlapped or adjacent, and the acknowledgment type of the ranges. It can be
27 * more complicated when the setting range covers multiple already set bad block
28 * ranges, with restrictions of maximum length of each bad range and the bad
29 * table space limitation.
30 *
31 * It is difficult and unnecessary to take care of all the possible situations,
32 * for setting a large range of bad blocks, we can handle it by dividing the
33 * large range into smaller ones when encounter overlap, max range length or
34 * bad table full conditions. Every time only a smaller piece of the bad range
35 * is handled with a limited number of conditions how it is interacted with
36 * possible overlapped or adjacent already set bad block ranges. Then the hard
37 * complicated problem can be much simpler to handle in proper way.
38 *
39 * When setting a range of bad blocks to the bad table, the simplified situations
40 * to be considered are, (The already set bad blocks ranges are naming with
41 * prefix E, and the setting bad blocks range is naming with prefix S)
42 *
43 * 1) A setting range is not overlapped or adjacent to any other already set bad
44 * block range.
45 * +--------+
46 * | S |
47 * +--------+
48 * +-------------+ +-------------+
49 * | E1 | | E2 |
50 * +-------------+ +-------------+
51 * For this situation if the bad blocks table is not full, just allocate a
52 * free slot from the bad blocks table to mark the setting range S. The
53 * result is,
54 * +-------------+ +--------+ +-------------+
55 * | E1 | | S | | E2 |
56 * +-------------+ +--------+ +-------------+
57 * 2) A setting range starts exactly at a start LBA of an already set bad blocks
58 * range.
59 * 2.1) The setting range size < already set range size
60 * +--------+
61 * | S |
62 * +--------+
63 * +-------------+
64 * | E |
65 * +-------------+
66 * 2.1.1) If S and E are both acked or unacked range, the setting range S can
67 * be merged into existing bad range E. The result is,
68 * +-------------+
69 * | S |
70 * +-------------+
71 * 2.1.2) If S is unacked setting and E is acked, the setting will be denied, and
72 * the result is,
73 * +-------------+
74 * | E |
75 * +-------------+
76 * 2.1.3) If S is acked setting and E is unacked, range S can overwrite on E.
77 * An extra slot from the bad blocks table will be allocated for S, and head
78 * of E will move to end of the inserted range S. The result is,
79 * +--------+----+
80 * | S | E |
81 * +--------+----+
82 * 2.2) The setting range size == already set range size
83 * 2.2.1) If S and E are both acked or unacked range, the setting range S can
84 * be merged into existing bad range E. The result is,
85 * +-------------+
86 * | S |
87 * +-------------+
88 * 2.2.2) If S is unacked setting and E is acked, the setting will be denied, and
89 * the result is,
90 * +-------------+
91 * | E |
92 * +-------------+
93 * 2.2.3) If S is acked setting and E is unacked, range S can overwrite all of
94 bad blocks range E. The result is,
95 * +-------------+
96 * | S |
97 * +-------------+
98 * 2.3) The setting range size > already set range size
99 * +-------------------+
100 * | S |
101 * +-------------------+
102 * +-------------+
103 * | E |
104 * +-------------+
105 * For such situation, the setting range S can be treated as two parts, the
106 * first part (S1) is as same size as the already set range E, the second
107 * part (S2) is the rest of setting range.
108 * +-------------+-----+ +-------------+ +-----+
109 * | S1 | S2 | | S1 | | S2 |
110 * +-------------+-----+ ===> +-------------+ +-----+
111 * +-------------+ +-------------+
112 * | E | | E |
113 * +-------------+ +-------------+
114 * Now we only focus on how to handle the setting range S1 and already set
115 * range E, which are already explained in 2.2), for the rest S2 it will be
116 * handled later in next loop.
117 * 3) A setting range starts before the start LBA of an already set bad blocks
118 * range.
119 * +-------------+
120 * | S |
121 * +-------------+
122 * +-------------+
123 * | E |
124 * +-------------+
125 * For this situation, the setting range S can be divided into two parts, the
126 * first (S1) ends at the start LBA of already set range E, the second part
127 * (S2) starts exactly at a start LBA of the already set range E.
128 * +----+---------+ +----+ +---------+
129 * | S1 | S2 | | S1 | | S2 |
130 * +----+---------+ ===> +----+ +---------+
131 * +-------------+ +-------------+
132 * | E | | E |
133 * +-------------+ +-------------+
134 * Now only the first part S1 should be handled in this loop, which is in
135 * similar condition as 1). The rest part S2 has exact same start LBA address
136 * of the already set range E, they will be handled in next loop in one of
137 * situations in 2).
138 * 4) A setting range starts after the start LBA of an already set bad blocks
139 * range.
140 * 4.1) If the setting range S exactly matches the tail part of already set bad
141 * blocks range E, like the following chart shows,
142 * +---------+
143 * | S |
144 * +---------+
145 * +-------------+
146 * | E |
147 * +-------------+
148 * 4.1.1) If range S and E have same acknowledge value (both acked or unacked),
149 * they will be merged into one, the result is,
150 * +-------------+
151 * | S |
152 * +-------------+
153 * 4.1.2) If range E is acked and the setting range S is unacked, the setting
154 * request of S will be rejected, the result is,
155 * +-------------+
156 * | E |
157 * +-------------+
158 * 4.1.3) If range E is unacked, and the setting range S is acked, then S may
159 * overwrite the overlapped range of E, the result is,
160 * +---+---------+
161 * | E | S |
162 * +---+---------+
163 * 4.2) If the setting range S stays in middle of an already set range E, like
164 * the following chart shows,
165 * +----+
166 * | S |
167 * +----+
168 * +--------------+
169 * | E |
170 * +--------------+
171 * 4.2.1) If range S and E have same acknowledge value (both acked or unacked),
172 * they will be merged into one, the result is,
173 * +--------------+
174 * | S |
175 * +--------------+
176 * 4.2.2) If range E is acked and the setting range S is unacked, the setting
177 * request of S will be rejected, the result is also,
178 * +--------------+
179 * | E |
180 * +--------------+
181 * 4.2.3) If range E is unacked, and the setting range S is acked, then S will
182 * inserted into middle of E and split previous range E into two parts (E1
183 * and E2), the result is,
184 * +----+----+----+
185 * | E1 | S | E2 |
186 * +----+----+----+
187 * 4.3) If the setting bad blocks range S is overlapped with an already set bad
188 * blocks range E. The range S starts after the start LBA of range E, and
189 * ends after the end LBA of range E, as the following chart shows,
190 * +-------------------+
191 * | S |
192 * +-------------------+
193 * +-------------+
194 * | E |
195 * +-------------+
196 * For this situation the range S can be divided into two parts, the first
197 * part (S1) ends at end range E, and the second part (S2) has rest range of
198 * origin S.
199 * +---------+---------+ +---------+ +---------+
200 * | S1 | S2 | | S1 | | S2 |
201 * +---------+---------+ ===> +---------+ +---------+
202 * +-------------+ +-------------+
203 * | E | | E |
204 * +-------------+ +-------------+
205 * Now in this loop the setting range S1 and already set range E can be
206 * handled as the situations 4.1), the rest range S2 will be handled in next
207 * loop and ignored in this loop.
208 * 5) A setting bad blocks range S is adjacent to one or more already set bad
209 * blocks range(s), and they are all acked or unacked range.
210 * 5.1) Front merge: If the already set bad blocks range E is before setting
211 * range S and they are adjacent,
212 * +------+
213 * | S |
214 * +------+
215 * +-------+
216 * | E |
217 * +-------+
218 * 5.1.1) When total size of range S and E <= BB_MAX_LEN, and their acknowledge
219 * values are same, the setting range S can front merges into range E. The
220 * result is,
221 * +--------------+
222 * | S |
223 * +--------------+
224 * 5.1.2) Otherwise these two ranges cannot merge, just insert the setting
225 * range S right after already set range E into the bad blocks table. The
226 * result is,
227 * +--------+------+
228 * | E | S |
229 * +--------+------+
230 * 6) Special cases which above conditions cannot handle
231 * 6.1) Multiple already set ranges may merge into less ones in a full bad table
232 * +-------------------------------------------------------+
233 * | S |
234 * +-------------------------------------------------------+
235 * |<----- BB_MAX_LEN ----->|
236 * +-----+ +-----+ +-----+
237 * | E1 | | E2 | | E3 |
238 * +-----+ +-----+ +-----+
239 * In the above example, when the bad blocks table is full, inserting the
240 * first part of setting range S will fail because no more available slot
241 * can be allocated from bad blocks table. In this situation a proper
242 * setting method should be go though all the setting bad blocks range and
243 * look for chance to merge already set ranges into less ones. When there
244 * is available slot from bad blocks table, re-try again to handle more
245 * setting bad blocks ranges as many as possible.
246 * +------------------------+
247 * | S3 |
248 * +------------------------+
249 * |<----- BB_MAX_LEN ----->|
250 * +-----+-----+-----+---+-----+--+
251 * | S1 | S2 |
252 * +-----+-----+-----+---+-----+--+
253 * The above chart shows although the first part (S3) cannot be inserted due
254 * to no-space in bad blocks table, but the following E1, E2 and E3 ranges
255 * can be merged with rest part of S into less range S1 and S2. Now there is
256 * 1 free slot in bad blocks table.
257 * +------------------------+-----+-----+-----+---+-----+--+
258 * | S3 | S1 | S2 |
259 * +------------------------+-----+-----+-----+---+-----+--+
260 * Since the bad blocks table is not full anymore, re-try again for the
261 * origin setting range S. Now the setting range S3 can be inserted into the
262 * bad blocks table with previous freed slot from multiple ranges merge.
263 * 6.2) Front merge after overwrite
264 * In the following example, in bad blocks table, E1 is an acked bad blocks
265 * range and E2 is an unacked bad blocks range, therefore they are not able
266 * to merge into a larger range. The setting bad blocks range S is acked,
267 * therefore part of E2 can be overwritten by S.
268 * +--------+
269 * | S | acknowledged
270 * +--------+ S: 1
271 * +-------+-------------+ E1: 1
272 * | E1 | E2 | E2: 0
273 * +-------+-------------+
274 * With previous simplified routines, after overwriting part of E2 with S,
275 * the bad blocks table should be (E3 is remaining part of E2 which is not
276 * overwritten by S),
277 * acknowledged
278 * +-------+--------+----+ S: 1
279 * | E1 | S | E3 | E1: 1
280 * +-------+--------+----+ E3: 0
281 * The above result is correct but not perfect. Range E1 and S in the bad
282 * blocks table are all acked, merging them into a larger one range may
283 * occupy less bad blocks table space and make badblocks_check() faster.
284 * Therefore in such situation, after overwriting range S, the previous range
285 * E1 should be checked for possible front combination. Then the ideal
286 * result can be,
287 * +----------------+----+ acknowledged
288 * | E1 | E3 | E1: 1
289 * +----------------+----+ E3: 0
290 * 6.3) Behind merge: If the already set bad blocks range E is behind the setting
291 * range S and they are adjacent. Normally we don't need to care about this
292 * because front merge handles this while going though range S from head to
293 * tail, except for the tail part of range S. When the setting range S are
294 * fully handled, all the above simplified routine doesn't check whether the
295 * tail LBA of range S is adjacent to the next already set range and not
296 * merge them even it is possible.
297 * +------+
298 * | S |
299 * +------+
300 * +-------+
301 * | E |
302 * +-------+
303 * For the above special situation, when the setting range S are all handled
304 * and the loop ends, an extra check is necessary for whether next already
305 * set range E is right after S and mergeable.
306 * 6.3.1) When total size of range E and S <= BB_MAX_LEN, and their acknowledge
307 * values are same, the setting range S can behind merges into range E. The
308 * result is,
309 * +--------------+
310 * | S |
311 * +--------------+
312 * 6.3.2) Otherwise these two ranges cannot merge, just insert the setting range
313 * S in front of the already set range E in the bad blocks table. The result
314 * is,
315 * +------+-------+
316 * | S | E |
317 * +------+-------+
318 *
319 * All the above 5 simplified situations and 3 special cases may cover 99%+ of
320 * the bad block range setting conditions. Maybe there is some rare corner case
321 * is not considered and optimized, it won't hurt if badblocks_set() fails due
322 * to no space, or some ranges are not merged to save bad blocks table space.
323 *
324 * Inside badblocks_set() each loop starts by jumping to re_insert label, every
325 * time for the new loop prev_badblocks() is called to find an already set range
326 * which starts before or at current setting range. Since the setting bad blocks
327 * range is handled from head to tail, most of the cases it is unnecessary to do
328 * the binary search inside prev_badblocks(), it is possible to provide a hint
329 * to prev_badblocks() for a fast path, then the expensive binary search can be
330 * avoided. In my test with the hint to prev_badblocks(), except for the first
331 * loop, all rested calls to prev_badblocks() can go into the fast path and
332 * return correct bad blocks table index immediately.
333 *
334 *
335 * Clearing a bad blocks range from the bad block table has similar idea as
336 * setting does, but much more simpler. The only thing needs to be noticed is
337 * when the clearing range hits middle of a bad block range, the existing bad
338 * block range will split into two, and one more item should be added into the
339 * bad block table. The simplified situations to be considered are, (The already
340 * set bad blocks ranges in bad block table are naming with prefix E, and the
341 * clearing bad blocks range is naming with prefix C)
342 *
343 * 1) A clearing range is not overlapped to any already set ranges in bad block
344 * table.
345 * +-----+ | +-----+ | +-----+
346 * | C | | | C | | | C |
347 * +-----+ or +-----+ or +-----+
348 * +---+ | +----+ +----+ | +---+
349 * | E | | | E1 | | E2 | | | E |
350 * +---+ | +----+ +----+ | +---+
351 * For the above situations, no bad block to be cleared and no failure
352 * happens, simply returns 0.
353 * 2) The clearing range hits middle of an already setting bad blocks range in
354 * the bad block table.
355 * +---+
356 * | C |
357 * +---+
358 * +-----------------+
359 * | E |
360 * +-----------------+
361 * In this situation if the bad block table is not full, the range E will be
362 * split into two ranges E1 and E2. The result is,
363 * +------+ +------+
364 * | E1 | | E2 |
365 * +------+ +------+
366 * 3) The clearing range starts exactly at same LBA as an already set bad block range
367 * from the bad block table.
368 * 3.1) Partially covered at head part
369 * +------------+
370 * | C |
371 * +------------+
372 * +-----------------+
373 * | E |
374 * +-----------------+
375 * For this situation, the overlapped already set range will update the
376 * start LBA to end of C and shrink the range to BB_LEN(E) - BB_LEN(C). No
377 * item deleted from bad block table. The result is,
378 * +----+
379 * | E1 |
380 * +----+
381 * 3.2) Exact fully covered
382 * +-----------------+
383 * | C |
384 * +-----------------+
385 * +-----------------+
386 * | E |
387 * +-----------------+
388 * For this situation the whole bad blocks range E will be cleared and its
389 * corresponded item is deleted from the bad block table.
390 * 4) The clearing range exactly ends at same LBA as an already set bad block
391 * range.
392 * +-------+
393 * | C |
394 * +-------+
395 * +-----------------+
396 * | E |
397 * +-----------------+
398 * For the above situation, the already set range E is updated to shrink its
399 * end to the start of C, and reduce its length to BB_LEN(E) - BB_LEN(C).
400 * The result is,
401 * +---------+
402 * | E |
403 * +---------+
404 * 5) The clearing range is partially overlapped with an already set bad block
405 * range from the bad block table.
406 * 5.1) The already set bad block range is front overlapped with the clearing
407 * range.
408 * +----------+
409 * | C |
410 * +----------+
411 * +------------+
412 * | E |
413 * +------------+
414 * For such situation, the clearing range C can be treated as two parts. The
415 * first part ends at the start LBA of range E, and the second part starts at
416 * same LBA of range E.
417 * +----+-----+ +----+ +-----+
418 * | C1 | C2 | | C1 | | C2 |
419 * +----+-----+ ===> +----+ +-----+
420 * +------------+ +------------+
421 * | E | | E |
422 * +------------+ +------------+
423 * Now the first part C1 can be handled as condition 1), and the second part C2 can be
424 * handled as condition 3.1) in next loop.
425 * 5.2) The already set bad block range is behind overlaopped with the clearing
426 * range.
427 * +----------+
428 * | C |
429 * +----------+
430 * +------------+
431 * | E |
432 * +------------+
433 * For such situation, the clearing range C can be treated as two parts. The
434 * first part C1 ends at same end LBA of range E, and the second part starts
435 * at end LBA of range E.
436 * +----+-----+ +----+ +-----+
437 * | C1 | C2 | | C1 | | C2 |
438 * +----+-----+ ===> +----+ +-----+
439 * +------------+ +------------+
440 * | E | | E |
441 * +------------+ +------------+
442 * Now the first part clearing range C1 can be handled as condition 4), and
443 * the second part clearing range C2 can be handled as condition 1) in next
444 * loop.
445 *
446 * All bad blocks range clearing can be simplified into the above 5 situations
447 * by only handling the head part of the clearing range in each run of the
448 * while-loop. The idea is similar to bad blocks range setting but much
449 * simpler.
450 */
451
452 /*
453 * Find the range starts at-or-before 's' from bad table. The search
454 * starts from index 'hint' and stops at index 'hint_end' from the bad
455 * table.
456 */
prev_by_hint(struct badblocks * bb,sector_t s,int hint)457 static int prev_by_hint(struct badblocks *bb, sector_t s, int hint)
458 {
459 int hint_end = hint + 2;
460 u64 *p = bb->page;
461 int ret = -1;
462
463 while ((hint < hint_end) && ((hint + 1) <= bb->count) &&
464 (BB_OFFSET(p[hint]) <= s)) {
465 if ((hint + 1) == bb->count || BB_OFFSET(p[hint + 1]) > s) {
466 ret = hint;
467 break;
468 }
469 hint++;
470 }
471
472 return ret;
473 }
474
475 /*
476 * Find the range starts at-or-before bad->start. If 'hint' is provided
477 * (hint >= 0) then search in the bad table from hint firstly. It is
478 * very probably the wanted bad range can be found from the hint index,
479 * then the unnecessary while-loop iteration can be avoided.
480 */
prev_badblocks(struct badblocks * bb,struct badblocks_context * bad,int hint)481 static int prev_badblocks(struct badblocks *bb, struct badblocks_context *bad,
482 int hint)
483 {
484 sector_t s = bad->start;
485 int ret = -1;
486 int lo, hi;
487 u64 *p;
488
489 if (!bb->count)
490 goto out;
491
492 if (hint >= 0) {
493 ret = prev_by_hint(bb, s, hint);
494 if (ret >= 0)
495 goto out;
496 }
497
498 lo = 0;
499 hi = bb->count;
500 p = bb->page;
501
502 /* The following bisect search might be unnecessary */
503 if (BB_OFFSET(p[lo]) > s)
504 return -1;
505 if (BB_OFFSET(p[hi - 1]) <= s)
506 return hi - 1;
507
508 /* Do bisect search in bad table */
509 while (hi - lo > 1) {
510 int mid = (lo + hi)/2;
511 sector_t a = BB_OFFSET(p[mid]);
512
513 if (a == s) {
514 ret = mid;
515 goto out;
516 }
517
518 if (a < s)
519 lo = mid;
520 else
521 hi = mid;
522 }
523
524 if (BB_OFFSET(p[lo]) <= s)
525 ret = lo;
526 out:
527 return ret;
528 }
529
530 /*
531 * Return 'true' if the range indicated by 'bad' can be forward
532 * merged with the bad range (from the bad table) indexed by 'prev'.
533 */
can_merge_front(struct badblocks * bb,int prev,struct badblocks_context * bad)534 static bool can_merge_front(struct badblocks *bb, int prev,
535 struct badblocks_context *bad)
536 {
537 sector_t s = bad->start;
538 u64 *p = bb->page;
539
540 if (BB_ACK(p[prev]) == bad->ack &&
541 (s < BB_END(p[prev]) ||
542 (s == BB_END(p[prev]) && (BB_LEN(p[prev]) < BB_MAX_LEN))))
543 return true;
544 return false;
545 }
546
547 /*
548 * Do forward merge for range indicated by 'bad' and the bad range
549 * (from bad table) indexed by 'prev'. The return value is sectors
550 * merged from bad->len.
551 */
front_merge(struct badblocks * bb,int prev,struct badblocks_context * bad)552 static int front_merge(struct badblocks *bb, int prev, struct badblocks_context *bad)
553 {
554 sector_t sectors = bad->len;
555 sector_t s = bad->start;
556 u64 *p = bb->page;
557 int merged = 0;
558
559 WARN_ON(s > BB_END(p[prev]));
560
561 if (s < BB_END(p[prev])) {
562 merged = min_t(sector_t, sectors, BB_END(p[prev]) - s);
563 } else {
564 merged = min_t(sector_t, sectors, BB_MAX_LEN - BB_LEN(p[prev]));
565 if ((prev + 1) < bb->count &&
566 merged > (BB_OFFSET(p[prev + 1]) - BB_END(p[prev]))) {
567 merged = BB_OFFSET(p[prev + 1]) - BB_END(p[prev]);
568 }
569
570 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
571 BB_LEN(p[prev]) + merged, bad->ack);
572 }
573
574 return merged;
575 }
576
577 /*
578 * 'Combine' is a special case which can_merge_front() is not able to
579 * handle: If a bad range (indexed by 'prev' from bad table) exactly
580 * starts as bad->start, and the bad range ahead of 'prev' (indexed by
581 * 'prev - 1' from bad table) exactly ends at where 'prev' starts, and
582 * the sum of their lengths does not exceed BB_MAX_LEN limitation, then
583 * these two bad range (from bad table) can be combined.
584 *
585 * Return 'true' if bad ranges indexed by 'prev' and 'prev - 1' from bad
586 * table can be combined.
587 */
can_combine_front(struct badblocks * bb,int prev,struct badblocks_context * bad)588 static bool can_combine_front(struct badblocks *bb, int prev,
589 struct badblocks_context *bad)
590 {
591 u64 *p = bb->page;
592
593 if ((prev > 0) &&
594 (BB_OFFSET(p[prev]) == bad->start) &&
595 (BB_END(p[prev - 1]) == BB_OFFSET(p[prev])) &&
596 (BB_LEN(p[prev - 1]) + BB_LEN(p[prev]) <= BB_MAX_LEN) &&
597 (BB_ACK(p[prev - 1]) == BB_ACK(p[prev])))
598 return true;
599 return false;
600 }
601
602 /*
603 * Combine the bad ranges indexed by 'prev' and 'prev - 1' (from bad
604 * table) into one larger bad range, and the new range is indexed by
605 * 'prev - 1'.
606 * The caller of front_combine() will decrease bb->count, therefore
607 * it is unnecessary to clear p[perv] after front merge.
608 */
front_combine(struct badblocks * bb,int prev)609 static void front_combine(struct badblocks *bb, int prev)
610 {
611 u64 *p = bb->page;
612
613 p[prev - 1] = BB_MAKE(BB_OFFSET(p[prev - 1]),
614 BB_LEN(p[prev - 1]) + BB_LEN(p[prev]),
615 BB_ACK(p[prev]));
616 if ((prev + 1) < bb->count)
617 memmove(p + prev, p + prev + 1, (bb->count - prev - 1) * 8);
618 }
619
620 /*
621 * Return 'true' if the range indicated by 'bad' is exactly forward
622 * overlapped with the bad range (from bad table) indexed by 'front'.
623 * Exactly forward overlap means the bad range (from bad table) indexed
624 * by 'prev' does not cover the whole range indicated by 'bad'.
625 */
overlap_front(struct badblocks * bb,int front,struct badblocks_context * bad)626 static bool overlap_front(struct badblocks *bb, int front,
627 struct badblocks_context *bad)
628 {
629 u64 *p = bb->page;
630
631 if (bad->start >= BB_OFFSET(p[front]) &&
632 bad->start < BB_END(p[front]))
633 return true;
634 return false;
635 }
636
637 /*
638 * Return 'true' if the range indicated by 'bad' is exactly backward
639 * overlapped with the bad range (from bad table) indexed by 'behind'.
640 */
overlap_behind(struct badblocks * bb,struct badblocks_context * bad,int behind)641 static bool overlap_behind(struct badblocks *bb, struct badblocks_context *bad,
642 int behind)
643 {
644 u64 *p = bb->page;
645
646 if (bad->start < BB_OFFSET(p[behind]) &&
647 (bad->start + bad->len) > BB_OFFSET(p[behind]))
648 return true;
649 return false;
650 }
651
652 /*
653 * Return 'true' if the range indicated by 'bad' can overwrite the bad
654 * range (from bad table) indexed by 'prev'.
655 *
656 * The range indicated by 'bad' can overwrite the bad range indexed by
657 * 'prev' when,
658 * 1) The whole range indicated by 'bad' can cover partial or whole bad
659 * range (from bad table) indexed by 'prev'.
660 * 2) The ack value of 'bad' is larger or equal to the ack value of bad
661 * range 'prev'.
662 *
663 * If the overwriting doesn't cover the whole bad range (from bad table)
664 * indexed by 'prev', new range might be split from existing bad range,
665 * 1) The overwrite covers head or tail part of existing bad range, 1
666 * extra bad range will be split and added into the bad table.
667 * 2) The overwrite covers middle of existing bad range, 2 extra bad
668 * ranges will be split (ahead and after the overwritten range) and
669 * added into the bad table.
670 * The number of extra split ranges of the overwriting is stored in
671 * 'extra' and returned for the caller.
672 */
can_front_overwrite(struct badblocks * bb,int prev,struct badblocks_context * bad,int * extra)673 static bool can_front_overwrite(struct badblocks *bb, int prev,
674 struct badblocks_context *bad, int *extra)
675 {
676 u64 *p = bb->page;
677 int len;
678
679 WARN_ON(!overlap_front(bb, prev, bad));
680
681 if (BB_ACK(p[prev]) >= bad->ack)
682 return false;
683
684 if (BB_END(p[prev]) <= (bad->start + bad->len)) {
685 len = BB_END(p[prev]) - bad->start;
686 if (BB_OFFSET(p[prev]) == bad->start)
687 *extra = 0;
688 else
689 *extra = 1;
690
691 bad->len = len;
692 } else {
693 if (BB_OFFSET(p[prev]) == bad->start)
694 *extra = 1;
695 else
696 /*
697 * prev range will be split into two, beside the overwritten
698 * one, an extra slot needed from bad table.
699 */
700 *extra = 2;
701 }
702
703 if ((bb->count + (*extra)) > MAX_BADBLOCKS)
704 return false;
705
706 return true;
707 }
708
709 /*
710 * Do the overwrite from the range indicated by 'bad' to the bad range
711 * (from bad table) indexed by 'prev'.
712 * The previously called can_front_overwrite() will provide how many
713 * extra bad range(s) might be split and added into the bad table. All
714 * the splitting cases in the bad table will be handled here.
715 */
front_overwrite(struct badblocks * bb,int prev,struct badblocks_context * bad,int extra)716 static int front_overwrite(struct badblocks *bb, int prev,
717 struct badblocks_context *bad, int extra)
718 {
719 u64 *p = bb->page;
720 sector_t orig_end = BB_END(p[prev]);
721 int orig_ack = BB_ACK(p[prev]);
722
723 switch (extra) {
724 case 0:
725 p[prev] = BB_MAKE(BB_OFFSET(p[prev]), BB_LEN(p[prev]),
726 bad->ack);
727 break;
728 case 1:
729 if (BB_OFFSET(p[prev]) == bad->start) {
730 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
731 bad->len, bad->ack);
732 memmove(p + prev + 2, p + prev + 1,
733 (bb->count - prev - 1) * 8);
734 p[prev + 1] = BB_MAKE(bad->start + bad->len,
735 orig_end - BB_END(p[prev]),
736 orig_ack);
737 } else {
738 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
739 bad->start - BB_OFFSET(p[prev]),
740 orig_ack);
741 /*
742 * prev +2 -> prev + 1 + 1, which is for,
743 * 1) prev + 1: the slot index of the previous one
744 * 2) + 1: one more slot for extra being 1.
745 */
746 memmove(p + prev + 2, p + prev + 1,
747 (bb->count - prev - 1) * 8);
748 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
749 }
750 break;
751 case 2:
752 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
753 bad->start - BB_OFFSET(p[prev]),
754 orig_ack);
755 /*
756 * prev + 3 -> prev + 1 + 2, which is for,
757 * 1) prev + 1: the slot index of the previous one
758 * 2) + 2: two more slots for extra being 2.
759 */
760 memmove(p + prev + 3, p + prev + 1,
761 (bb->count - prev - 1) * 8);
762 p[prev + 1] = BB_MAKE(bad->start, bad->len, bad->ack);
763 p[prev + 2] = BB_MAKE(BB_END(p[prev + 1]),
764 orig_end - BB_END(p[prev + 1]),
765 orig_ack);
766 break;
767 default:
768 break;
769 }
770
771 return bad->len;
772 }
773
774 /*
775 * Explicitly insert a range indicated by 'bad' to the bad table, where
776 * the location is indexed by 'at'.
777 */
insert_at(struct badblocks * bb,int at,struct badblocks_context * bad)778 static int insert_at(struct badblocks *bb, int at, struct badblocks_context *bad)
779 {
780 u64 *p = bb->page;
781 int len;
782
783 WARN_ON(badblocks_full(bb));
784
785 len = min_t(sector_t, bad->len, BB_MAX_LEN);
786 if (at < bb->count)
787 memmove(p + at + 1, p + at, (bb->count - at) * 8);
788 p[at] = BB_MAKE(bad->start, len, bad->ack);
789
790 return len;
791 }
792
badblocks_update_acked(struct badblocks * bb)793 static void badblocks_update_acked(struct badblocks *bb)
794 {
795 bool unacked = false;
796 u64 *p = bb->page;
797 int i;
798
799 if (!bb->unacked_exist)
800 return;
801
802 for (i = 0; i < bb->count ; i++) {
803 if (!BB_ACK(p[i])) {
804 unacked = true;
805 break;
806 }
807 }
808
809 if (!unacked)
810 bb->unacked_exist = 0;
811 }
812
813 /*
814 * Return 'true' if the range indicated by 'bad' is exactly backward
815 * overlapped with the bad range (from bad table) indexed by 'behind'.
816 */
try_adjacent_combine(struct badblocks * bb,int prev)817 static bool try_adjacent_combine(struct badblocks *bb, int prev)
818 {
819 u64 *p = bb->page;
820
821 if (prev >= 0 && (prev + 1) < bb->count &&
822 BB_END(p[prev]) == BB_OFFSET(p[prev + 1]) &&
823 (BB_LEN(p[prev]) + BB_LEN(p[prev + 1])) <= BB_MAX_LEN &&
824 BB_ACK(p[prev]) == BB_ACK(p[prev + 1])) {
825 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
826 BB_LEN(p[prev]) + BB_LEN(p[prev + 1]),
827 BB_ACK(p[prev]));
828
829 if ((prev + 2) < bb->count)
830 memmove(p + prev + 1, p + prev + 2,
831 (bb->count - (prev + 2)) * 8);
832 bb->count--;
833 return true;
834 }
835 return false;
836 }
837
838 /* Do exact work to set bad block range into the bad block table */
_badblocks_set(struct badblocks * bb,sector_t s,sector_t sectors,int acknowledged)839 static bool _badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
840 int acknowledged)
841 {
842 int len = 0, added = 0;
843 struct badblocks_context bad;
844 int prev = -1, hint = -1;
845 unsigned long flags;
846 u64 *p;
847
848 if (bb->shift < 0)
849 /* badblocks are disabled */
850 return false;
851
852 if (sectors == 0)
853 /* Invalid sectors number */
854 return false;
855
856 if (bb->shift) {
857 /* round the start down, and the end up */
858 sector_t next = s + sectors;
859
860 rounddown(s, 1 << bb->shift);
861 roundup(next, 1 << bb->shift);
862 sectors = next - s;
863 }
864
865 write_seqlock_irqsave(&bb->lock, flags);
866
867 bad.ack = acknowledged;
868 p = bb->page;
869
870 re_insert:
871 bad.start = s;
872 bad.len = sectors;
873 len = 0;
874
875 if (badblocks_full(bb))
876 goto out;
877
878 if (badblocks_empty(bb)) {
879 len = insert_at(bb, 0, &bad);
880 bb->count++;
881 added++;
882 goto update_sectors;
883 }
884
885 prev = prev_badblocks(bb, &bad, hint);
886
887 /* start before all badblocks */
888 if (prev < 0) {
889 /* insert on the first */
890 if (bad.len > (BB_OFFSET(p[0]) - bad.start))
891 bad.len = BB_OFFSET(p[0]) - bad.start;
892 len = insert_at(bb, 0, &bad);
893 bb->count++;
894 added++;
895 hint = ++prev;
896 goto update_sectors;
897 }
898
899 /* in case p[prev-1] can be merged with p[prev] */
900 if (can_combine_front(bb, prev, &bad)) {
901 front_combine(bb, prev);
902 bb->count--;
903 added++;
904 hint = prev;
905 goto update_sectors;
906 }
907
908 if (overlap_front(bb, prev, &bad)) {
909 if (can_merge_front(bb, prev, &bad)) {
910 len = front_merge(bb, prev, &bad);
911 added++;
912 } else {
913 int extra = 0;
914
915 if (!can_front_overwrite(bb, prev, &bad, &extra)) {
916 if (extra > 0)
917 goto out;
918
919 len = min_t(sector_t,
920 BB_END(p[prev]) - s, sectors);
921 hint = prev;
922 goto update_sectors;
923 }
924
925 len = front_overwrite(bb, prev, &bad, extra);
926 added++;
927 bb->count += extra;
928
929 if (can_combine_front(bb, prev, &bad)) {
930 front_combine(bb, prev);
931 bb->count--;
932 }
933 }
934 hint = prev;
935 goto update_sectors;
936 }
937
938 if (can_merge_front(bb, prev, &bad)) {
939 len = front_merge(bb, prev, &bad);
940 added++;
941 hint = prev;
942 goto update_sectors;
943 }
944
945 /* cannot merge and there is space in bad table */
946 if ((prev + 1) < bb->count &&
947 overlap_behind(bb, &bad, prev + 1))
948 bad.len = min_t(sector_t,
949 bad.len, BB_OFFSET(p[prev + 1]) - bad.start);
950
951 len = insert_at(bb, prev + 1, &bad);
952 bb->count++;
953 added++;
954 hint = ++prev;
955
956 update_sectors:
957 s += len;
958 sectors -= len;
959
960 if (sectors > 0)
961 goto re_insert;
962
963 /*
964 * Check whether the following already set range can be
965 * merged. (prev < 0) condition is not handled here,
966 * because it's already complicated enough.
967 */
968 try_adjacent_combine(bb, prev);
969
970 out:
971 if (added) {
972 set_changed(bb);
973
974 if (!acknowledged)
975 bb->unacked_exist = 1;
976 else
977 badblocks_update_acked(bb);
978 }
979
980 write_sequnlock_irqrestore(&bb->lock, flags);
981
982 return sectors == 0;
983 }
984
985 /*
986 * Clear the bad block range from bad block table which is front overlapped
987 * with the clearing range. The return value is how many sectors from an
988 * already set bad block range are cleared. If the whole bad block range is
989 * covered by the clearing range and fully cleared, 'delete' is set as 1 for
990 * the caller to reduce bb->count.
991 */
front_clear(struct badblocks * bb,int prev,struct badblocks_context * bad,int * deleted)992 static int front_clear(struct badblocks *bb, int prev,
993 struct badblocks_context *bad, int *deleted)
994 {
995 sector_t sectors = bad->len;
996 sector_t s = bad->start;
997 u64 *p = bb->page;
998 int cleared = 0;
999
1000 *deleted = 0;
1001 if (s == BB_OFFSET(p[prev])) {
1002 if (BB_LEN(p[prev]) > sectors) {
1003 p[prev] = BB_MAKE(BB_OFFSET(p[prev]) + sectors,
1004 BB_LEN(p[prev]) - sectors,
1005 BB_ACK(p[prev]));
1006 cleared = sectors;
1007 } else {
1008 /* BB_LEN(p[prev]) <= sectors */
1009 cleared = BB_LEN(p[prev]);
1010 if ((prev + 1) < bb->count)
1011 memmove(p + prev, p + prev + 1,
1012 (bb->count - prev - 1) * 8);
1013 *deleted = 1;
1014 }
1015 } else if (s > BB_OFFSET(p[prev])) {
1016 if (BB_END(p[prev]) <= (s + sectors)) {
1017 cleared = BB_END(p[prev]) - s;
1018 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1019 s - BB_OFFSET(p[prev]),
1020 BB_ACK(p[prev]));
1021 } else {
1022 /* Splitting is handled in front_splitting_clear() */
1023 BUG();
1024 }
1025 }
1026
1027 return cleared;
1028 }
1029
1030 /*
1031 * Handle the condition that the clearing range hits middle of an already set
1032 * bad block range from bad block table. In this condition the existing bad
1033 * block range is split into two after the middle part is cleared.
1034 */
front_splitting_clear(struct badblocks * bb,int prev,struct badblocks_context * bad)1035 static int front_splitting_clear(struct badblocks *bb, int prev,
1036 struct badblocks_context *bad)
1037 {
1038 u64 *p = bb->page;
1039 u64 end = BB_END(p[prev]);
1040 int ack = BB_ACK(p[prev]);
1041 sector_t sectors = bad->len;
1042 sector_t s = bad->start;
1043
1044 p[prev] = BB_MAKE(BB_OFFSET(p[prev]),
1045 s - BB_OFFSET(p[prev]),
1046 ack);
1047 memmove(p + prev + 2, p + prev + 1, (bb->count - prev - 1) * 8);
1048 p[prev + 1] = BB_MAKE(s + sectors, end - s - sectors, ack);
1049 return sectors;
1050 }
1051
1052 /* Do the exact work to clear bad block range from the bad block table */
_badblocks_clear(struct badblocks * bb,sector_t s,sector_t sectors)1053 static bool _badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
1054 {
1055 struct badblocks_context bad;
1056 int prev = -1, hint = -1;
1057 int len = 0, cleared = 0;
1058 u64 *p;
1059
1060 if (bb->shift < 0)
1061 /* badblocks are disabled */
1062 return false;
1063
1064 if (sectors == 0)
1065 /* Invalid sectors number */
1066 return false;
1067
1068 if (bb->shift) {
1069 sector_t target;
1070
1071 /* When clearing we round the start up and the end down.
1072 * This should not matter as the shift should align with
1073 * the block size and no rounding should ever be needed.
1074 * However it is better the think a block is bad when it
1075 * isn't than to think a block is not bad when it is.
1076 */
1077 target = s + sectors;
1078 roundup(s, 1 << bb->shift);
1079 rounddown(target, 1 << bb->shift);
1080 sectors = target - s;
1081 }
1082
1083 write_seqlock_irq(&bb->lock);
1084
1085 bad.ack = true;
1086 p = bb->page;
1087
1088 re_clear:
1089 bad.start = s;
1090 bad.len = sectors;
1091
1092 if (badblocks_empty(bb)) {
1093 len = sectors;
1094 cleared++;
1095 goto update_sectors;
1096 }
1097
1098
1099 prev = prev_badblocks(bb, &bad, hint);
1100
1101 /* Start before all badblocks */
1102 if (prev < 0) {
1103 if (overlap_behind(bb, &bad, 0)) {
1104 len = BB_OFFSET(p[0]) - s;
1105 hint = 0;
1106 } else {
1107 len = sectors;
1108 }
1109 /*
1110 * Both situations are to clear non-bad range,
1111 * should be treated as successful
1112 */
1113 cleared++;
1114 goto update_sectors;
1115 }
1116
1117 /* Start after all badblocks */
1118 if ((prev + 1) >= bb->count && !overlap_front(bb, prev, &bad)) {
1119 len = sectors;
1120 cleared++;
1121 goto update_sectors;
1122 }
1123
1124 /* Clear will split a bad record but the table is full */
1125 if (badblocks_full(bb) && (BB_OFFSET(p[prev]) < bad.start) &&
1126 (BB_END(p[prev]) > (bad.start + sectors))) {
1127 len = sectors;
1128 goto update_sectors;
1129 }
1130
1131 if (overlap_front(bb, prev, &bad)) {
1132 if ((BB_OFFSET(p[prev]) < bad.start) &&
1133 (BB_END(p[prev]) > (bad.start + bad.len))) {
1134 /* Splitting */
1135 if ((bb->count + 1) <= MAX_BADBLOCKS) {
1136 len = front_splitting_clear(bb, prev, &bad);
1137 bb->count += 1;
1138 cleared++;
1139 } else {
1140 /* No space to split, give up */
1141 len = sectors;
1142 }
1143 } else {
1144 int deleted = 0;
1145
1146 len = front_clear(bb, prev, &bad, &deleted);
1147 bb->count -= deleted;
1148 cleared++;
1149 hint = prev;
1150 }
1151
1152 goto update_sectors;
1153 }
1154
1155 /* Not front overlap, but behind overlap */
1156 if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1157 len = BB_OFFSET(p[prev + 1]) - bad.start;
1158 hint = prev + 1;
1159 /* Clear non-bad range should be treated as successful */
1160 cleared++;
1161 goto update_sectors;
1162 }
1163
1164 /* Not cover any badblocks range in the table */
1165 len = sectors;
1166 /* Clear non-bad range should be treated as successful */
1167 cleared++;
1168
1169 update_sectors:
1170 s += len;
1171 sectors -= len;
1172
1173 if (sectors > 0)
1174 goto re_clear;
1175
1176 if (cleared) {
1177 badblocks_update_acked(bb);
1178 set_changed(bb);
1179 }
1180
1181 write_sequnlock_irq(&bb->lock);
1182
1183 if (!cleared)
1184 return false;
1185
1186 return true;
1187 }
1188
1189 /* Do the exact work to check bad blocks range from the bad block table */
_badblocks_check(struct badblocks * bb,sector_t s,sector_t sectors,sector_t * first_bad,sector_t * bad_sectors)1190 static int _badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
1191 sector_t *first_bad, sector_t *bad_sectors)
1192 {
1193 int prev = -1, hint = -1, set = 0;
1194 struct badblocks_context bad;
1195 int unacked_badblocks = 0;
1196 int acked_badblocks = 0;
1197 u64 *p = bb->page;
1198 int len, rv;
1199
1200 re_check:
1201 bad.start = s;
1202 bad.len = sectors;
1203
1204 if (badblocks_empty(bb)) {
1205 len = sectors;
1206 goto update_sectors;
1207 }
1208
1209 prev = prev_badblocks(bb, &bad, hint);
1210
1211 /* start after all badblocks */
1212 if ((prev >= 0) &&
1213 ((prev + 1) >= bb->count) && !overlap_front(bb, prev, &bad)) {
1214 len = sectors;
1215 goto update_sectors;
1216 }
1217
1218 /* Overlapped with front badblocks record */
1219 if ((prev >= 0) && overlap_front(bb, prev, &bad)) {
1220 if (BB_ACK(p[prev]))
1221 acked_badblocks++;
1222 else
1223 unacked_badblocks++;
1224
1225 if (BB_END(p[prev]) >= (s + sectors))
1226 len = sectors;
1227 else
1228 len = BB_END(p[prev]) - s;
1229
1230 if (set == 0) {
1231 *first_bad = BB_OFFSET(p[prev]);
1232 *bad_sectors = BB_LEN(p[prev]);
1233 set = 1;
1234 }
1235 goto update_sectors;
1236 }
1237
1238 /* Not front overlap, but behind overlap */
1239 if ((prev + 1) < bb->count && overlap_behind(bb, &bad, prev + 1)) {
1240 len = BB_OFFSET(p[prev + 1]) - bad.start;
1241 hint = prev + 1;
1242 goto update_sectors;
1243 }
1244
1245 /* not cover any badblocks range in the table */
1246 len = sectors;
1247
1248 update_sectors:
1249 s += len;
1250 sectors -= len;
1251
1252 if (sectors > 0)
1253 goto re_check;
1254
1255 WARN_ON(sectors < 0);
1256
1257 if (unacked_badblocks > 0)
1258 rv = -1;
1259 else if (acked_badblocks > 0)
1260 rv = 1;
1261 else
1262 rv = 0;
1263
1264 return rv;
1265 }
1266
1267 /**
1268 * badblocks_check() - check a given range for bad sectors
1269 * @bb: the badblocks structure that holds all badblock information
1270 * @s: sector (start) at which to check for badblocks
1271 * @sectors: number of sectors to check for badblocks
1272 * @first_bad: pointer to store location of the first badblock
1273 * @bad_sectors: pointer to store number of badblocks after @first_bad
1274 *
1275 * We can record which blocks on each device are 'bad' and so just
1276 * fail those blocks, or that stripe, rather than the whole device.
1277 * Entries in the bad-block table are 64bits wide. This comprises:
1278 * Length of bad-range, in sectors: 0-511 for lengths 1-512
1279 * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
1280 * A 'shift' can be set so that larger blocks are tracked and
1281 * consequently larger devices can be covered.
1282 * 'Acknowledged' flag - 1 bit. - the most significant bit.
1283 *
1284 * Locking of the bad-block table uses a seqlock so badblocks_check
1285 * might need to retry if it is very unlucky.
1286 * We will sometimes want to check for bad blocks in a bi_end_io function,
1287 * so we use the write_seqlock_irq variant.
1288 *
1289 * When looking for a bad block we specify a range and want to
1290 * know if any block in the range is bad. So we binary-search
1291 * to the last range that starts at-or-before the given endpoint,
1292 * (or "before the sector after the target range")
1293 * then see if it ends after the given start.
1294 *
1295 * Return:
1296 * 0: there are no known bad blocks in the range
1297 * 1: there are known bad block which are all acknowledged
1298 * -1: there are bad blocks which have not yet been acknowledged in metadata.
1299 * plus the start/length of the first bad section we overlap.
1300 */
badblocks_check(struct badblocks * bb,sector_t s,sector_t sectors,sector_t * first_bad,sector_t * bad_sectors)1301 int badblocks_check(struct badblocks *bb, sector_t s, sector_t sectors,
1302 sector_t *first_bad, sector_t *bad_sectors)
1303 {
1304 unsigned int seq;
1305 int rv;
1306
1307 WARN_ON(bb->shift < 0 || sectors == 0);
1308
1309 if (bb->shift > 0) {
1310 /* round the start down, and the end up */
1311 sector_t target = s + sectors;
1312
1313 rounddown(s, 1 << bb->shift);
1314 roundup(target, 1 << bb->shift);
1315 sectors = target - s;
1316 }
1317
1318 retry:
1319 seq = read_seqbegin(&bb->lock);
1320 rv = _badblocks_check(bb, s, sectors, first_bad, bad_sectors);
1321 if (read_seqretry(&bb->lock, seq))
1322 goto retry;
1323
1324 return rv;
1325 }
1326 EXPORT_SYMBOL_GPL(badblocks_check);
1327
1328 /**
1329 * badblocks_set() - Add a range of bad blocks to the table.
1330 * @bb: the badblocks structure that holds all badblock information
1331 * @s: first sector to mark as bad
1332 * @sectors: number of sectors to mark as bad
1333 * @acknowledged: weather to mark the bad sectors as acknowledged
1334 *
1335 * This might extend the table, or might contract it if two adjacent ranges
1336 * can be merged. We binary-search to find the 'insertion' point, then
1337 * decide how best to handle it.
1338 *
1339 * Return:
1340 * true: success
1341 * false: failed to set badblocks (out of space). Parital setting will be
1342 * treated as failure.
1343 */
badblocks_set(struct badblocks * bb,sector_t s,sector_t sectors,int acknowledged)1344 bool badblocks_set(struct badblocks *bb, sector_t s, sector_t sectors,
1345 int acknowledged)
1346 {
1347 return _badblocks_set(bb, s, sectors, acknowledged);
1348 }
1349 EXPORT_SYMBOL_GPL(badblocks_set);
1350
1351 /**
1352 * badblocks_clear() - Remove a range of bad blocks to the table.
1353 * @bb: the badblocks structure that holds all badblock information
1354 * @s: first sector to mark as bad
1355 * @sectors: number of sectors to mark as bad
1356 *
1357 * This may involve extending the table if we spilt a region,
1358 * but it must not fail. So if the table becomes full, we just
1359 * drop the remove request.
1360 *
1361 * Return:
1362 * true: success
1363 * false: failed to clear badblocks
1364 */
badblocks_clear(struct badblocks * bb,sector_t s,sector_t sectors)1365 bool badblocks_clear(struct badblocks *bb, sector_t s, sector_t sectors)
1366 {
1367 return _badblocks_clear(bb, s, sectors);
1368 }
1369 EXPORT_SYMBOL_GPL(badblocks_clear);
1370
1371 /**
1372 * ack_all_badblocks() - Acknowledge all bad blocks in a list.
1373 * @bb: the badblocks structure that holds all badblock information
1374 *
1375 * This only succeeds if ->changed is clear. It is used by
1376 * in-kernel metadata updates
1377 */
ack_all_badblocks(struct badblocks * bb)1378 void ack_all_badblocks(struct badblocks *bb)
1379 {
1380 if (bb->page == NULL || bb->changed)
1381 /* no point even trying */
1382 return;
1383 write_seqlock_irq(&bb->lock);
1384
1385 if (bb->changed == 0 && bb->unacked_exist) {
1386 u64 *p = bb->page;
1387 int i;
1388
1389 for (i = 0; i < bb->count ; i++) {
1390 if (!BB_ACK(p[i])) {
1391 sector_t start = BB_OFFSET(p[i]);
1392 int len = BB_LEN(p[i]);
1393
1394 p[i] = BB_MAKE(start, len, 1);
1395 }
1396 }
1397
1398 for (i = 0; i < bb->count ; i++)
1399 while (try_adjacent_combine(bb, i))
1400 ;
1401
1402 bb->unacked_exist = 0;
1403 }
1404 write_sequnlock_irq(&bb->lock);
1405 }
1406 EXPORT_SYMBOL_GPL(ack_all_badblocks);
1407
1408 /**
1409 * badblocks_show() - sysfs access to bad-blocks list
1410 * @bb: the badblocks structure that holds all badblock information
1411 * @page: buffer received from sysfs
1412 * @unack: weather to show unacknowledged badblocks
1413 *
1414 * Return:
1415 * Length of returned data
1416 */
badblocks_show(struct badblocks * bb,char * page,int unack)1417 ssize_t badblocks_show(struct badblocks *bb, char *page, int unack)
1418 {
1419 size_t len;
1420 int i;
1421 u64 *p = bb->page;
1422 unsigned seq;
1423
1424 if (bb->shift < 0)
1425 return 0;
1426
1427 retry:
1428 seq = read_seqbegin(&bb->lock);
1429
1430 len = 0;
1431 i = 0;
1432
1433 while (len < PAGE_SIZE && i < bb->count) {
1434 sector_t s = BB_OFFSET(p[i]);
1435 unsigned int length = BB_LEN(p[i]);
1436 int ack = BB_ACK(p[i]);
1437
1438 i++;
1439
1440 if (unack && ack)
1441 continue;
1442
1443 len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
1444 (unsigned long long)s << bb->shift,
1445 length << bb->shift);
1446 }
1447 if (unack && len == 0)
1448 bb->unacked_exist = 0;
1449
1450 if (read_seqretry(&bb->lock, seq))
1451 goto retry;
1452
1453 return len;
1454 }
1455 EXPORT_SYMBOL_GPL(badblocks_show);
1456
1457 /**
1458 * badblocks_store() - sysfs access to bad-blocks list
1459 * @bb: the badblocks structure that holds all badblock information
1460 * @page: buffer received from sysfs
1461 * @len: length of data received from sysfs
1462 * @unack: weather to show unacknowledged badblocks
1463 *
1464 * Return:
1465 * Length of the buffer processed or -ve error.
1466 */
badblocks_store(struct badblocks * bb,const char * page,size_t len,int unack)1467 ssize_t badblocks_store(struct badblocks *bb, const char *page, size_t len,
1468 int unack)
1469 {
1470 unsigned long long sector;
1471 int length;
1472 char newline;
1473
1474 switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
1475 case 3:
1476 if (newline != '\n')
1477 return -EINVAL;
1478 fallthrough;
1479 case 2:
1480 if (length <= 0)
1481 return -EINVAL;
1482 break;
1483 default:
1484 return -EINVAL;
1485 }
1486
1487 if (!badblocks_set(bb, sector, length, !unack))
1488 return -ENOSPC;
1489
1490 return len;
1491 }
1492 EXPORT_SYMBOL_GPL(badblocks_store);
1493
__badblocks_init(struct device * dev,struct badblocks * bb,int enable)1494 static int __badblocks_init(struct device *dev, struct badblocks *bb,
1495 int enable)
1496 {
1497 bb->dev = dev;
1498 bb->count = 0;
1499 if (enable)
1500 bb->shift = 0;
1501 else
1502 bb->shift = -1;
1503 if (dev)
1504 bb->page = devm_kzalloc(dev, PAGE_SIZE, GFP_KERNEL);
1505 else
1506 bb->page = kzalloc(PAGE_SIZE, GFP_KERNEL);
1507 if (!bb->page) {
1508 bb->shift = -1;
1509 return -ENOMEM;
1510 }
1511 seqlock_init(&bb->lock);
1512
1513 return 0;
1514 }
1515
1516 /**
1517 * badblocks_init() - initialize the badblocks structure
1518 * @bb: the badblocks structure that holds all badblock information
1519 * @enable: weather to enable badblocks accounting
1520 *
1521 * Return:
1522 * 0: success
1523 * -ve errno: on error
1524 */
badblocks_init(struct badblocks * bb,int enable)1525 int badblocks_init(struct badblocks *bb, int enable)
1526 {
1527 return __badblocks_init(NULL, bb, enable);
1528 }
1529 EXPORT_SYMBOL_GPL(badblocks_init);
1530
devm_init_badblocks(struct device * dev,struct badblocks * bb)1531 int devm_init_badblocks(struct device *dev, struct badblocks *bb)
1532 {
1533 if (!bb)
1534 return -EINVAL;
1535 return __badblocks_init(dev, bb, 1);
1536 }
1537 EXPORT_SYMBOL_GPL(devm_init_badblocks);
1538
1539 /**
1540 * badblocks_exit() - free the badblocks structure
1541 * @bb: the badblocks structure that holds all badblock information
1542 */
badblocks_exit(struct badblocks * bb)1543 void badblocks_exit(struct badblocks *bb)
1544 {
1545 if (!bb)
1546 return;
1547 if (bb->dev)
1548 devm_kfree(bb->dev, bb->page);
1549 else
1550 kfree(bb->page);
1551 bb->page = NULL;
1552 }
1553 EXPORT_SYMBOL_GPL(badblocks_exit);
1554