Lines Matching +full:no +full:- +full:wp

1 // SPDX-License-Identifier: GPL-2.0
12 #include "rcu-string.h"
13 #include "disk-io.h"
14 #include "block-group.h"
15 #include "dev-replace.h"
16 #include "space-info.h"
24 #define WP_MISSING_DEV ((u64)-1)
26 #define WP_CONVENTIONAL ((u64)-2)
31 * - primary superblock: 0B (zone 0)
32 * - first copy: 512G (zone starting at that offset)
33 * - second copy: 4T (zone starting at that offset)
48 * - BTRFS_SUPER_MIRROR_MAX zones for superblock mirrors
49 * - 3 zones to ensure at least one zone per SYSTEM, META and DATA block group
50 * - 1 zone for tree-log dedicated block group
51 * - 1 zone for relocation
57 * size of 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range.
71 return (zone->cond == BLK_ZONE_COND_FULL) || in sb_zone_is_full()
72 (zone->wp + SUPER_INFO_SECTORS > zone->start + zone->capacity); in sb_zone_is_full()
106 * *: Special case, no superblock is written in sb_write_pointer()
115 /* Special case to distinguish no superblock to read */ in sb_write_pointer()
117 return -ENOENT; in sb_write_pointer()
120 struct address_space *mapping = bdev->bd_mapping; in sb_write_pointer()
126 u64 bytenr = ALIGN_DOWN(zone_end, BTRFS_SUPER_INFO_SIZE) - in sb_write_pointer()
148 sector = zones[0].wp; in sb_write_pointer()
150 sector = zones[1].wp; in sb_write_pointer()
152 return -EUCLEAN; in sb_write_pointer()
168 case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; in sb_zone_number()
169 case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; in sb_zone_number()
186 return (u64)zone_number << zone_info->zone_size_shift; in zone_start_physical()
190 * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
197 const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; in emulate_report_zones()
198 sector_t bdev_size = bdev_nr_sectors(device->bdev); in emulate_report_zones()
206 zones[i].wp = zones[i].start + zone_sectors; in emulate_report_zones()
210 if (zones[i].wp >= bdev_size) { in emulate_report_zones()
222 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_get_dev_zones()
228 if (!bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zones()
235 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
239 ASSERT(IS_ALIGNED(pos, zinfo->zone_size)); in btrfs_get_dev_zones()
240 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
245 *nr_zones = min_t(u32, *nr_zones, zinfo->nr_zones - zno); in btrfs_get_dev_zones()
250 zone_info = &zinfo->zone_cache[zno + i]; in btrfs_get_dev_zones()
251 if (!zone_info->len) in btrfs_get_dev_zones()
257 memcpy(zones, zinfo->zone_cache + zno, in btrfs_get_dev_zones()
258 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
263 ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, in btrfs_get_dev_zones()
266 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zones()
268 pos, rcu_str_deref(device->name), in btrfs_get_dev_zones()
269 device->devid); in btrfs_get_dev_zones()
274 return -EIO; in btrfs_get_dev_zones()
277 if (zinfo->zone_cache) { in btrfs_get_dev_zones()
278 u32 zno = pos >> zinfo->zone_size_shift; in btrfs_get_dev_zones()
280 memcpy(zinfo->zone_cache + zno, zones, in btrfs_get_dev_zones()
281 sizeof(*zinfo->zone_cache) * *nr_zones); in btrfs_get_dev_zones()
291 struct btrfs_root *root = fs_info->dev_root; in calculate_emulated_zone_size()
303 return -ENOMEM; in calculate_emulated_zone_size()
309 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { in calculate_emulated_zone_size()
313 /* No dev extents at all? Not good */ in calculate_emulated_zone_size()
315 return -EUCLEAN; in calculate_emulated_zone_size()
318 leaf = path->nodes[0]; in calculate_emulated_zone_size()
319 dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); in calculate_emulated_zone_size()
320 fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); in calculate_emulated_zone_size()
326 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_get_dev_zone_info_all_devices()
330 /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ in btrfs_get_dev_zone_info_all_devices()
334 mutex_lock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
335 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_get_dev_zone_info_all_devices()
337 if (!device->bdev) in btrfs_get_dev_zone_info_all_devices()
344 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_get_dev_zone_info_all_devices()
351 struct btrfs_fs_info *fs_info = device->fs_info; in btrfs_get_dev_zone_info()
353 struct block_device *bdev = device->bdev; in btrfs_get_dev_zone_info()
371 if (device->zone_info) in btrfs_get_dev_zone_info()
376 return -ENOMEM; in btrfs_get_dev_zone_info()
378 device->zone_info = zone_info; in btrfs_get_dev_zone_info()
381 if (!fs_info->zone_size) { in btrfs_get_dev_zone_info()
387 ASSERT(fs_info->zone_size); in btrfs_get_dev_zone_info()
388 zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_get_dev_zone_info()
394 zone_info->zone_size = zone_sectors << SECTOR_SHIFT; in btrfs_get_dev_zone_info()
397 if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { in btrfs_get_dev_zone_info()
400 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
401 zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); in btrfs_get_dev_zone_info()
402 ret = -EINVAL; in btrfs_get_dev_zone_info()
404 } else if (zone_info->zone_size < BTRFS_MIN_ZONE_SIZE) { in btrfs_get_dev_zone_info()
407 rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
408 zone_info->zone_size, BTRFS_MIN_ZONE_SIZE); in btrfs_get_dev_zone_info()
409 ret = -EINVAL; in btrfs_get_dev_zone_info()
414 zone_info->zone_size_shift = ilog2(zone_info->zone_size); in btrfs_get_dev_zone_info()
415 zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); in btrfs_get_dev_zone_info()
417 zone_info->nr_zones++; in btrfs_get_dev_zone_info()
423 rcu_str_deref(device->name), max_active_zones, in btrfs_get_dev_zone_info()
425 ret = -EINVAL; in btrfs_get_dev_zone_info()
428 zone_info->max_active_zones = max_active_zones; in btrfs_get_dev_zone_info()
430 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
431 if (!zone_info->seq_zones) { in btrfs_get_dev_zone_info()
432 ret = -ENOMEM; in btrfs_get_dev_zone_info()
436 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
437 if (!zone_info->empty_zones) { in btrfs_get_dev_zone_info()
438 ret = -ENOMEM; in btrfs_get_dev_zone_info()
442 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_get_dev_zone_info()
443 if (!zone_info->active_zones) { in btrfs_get_dev_zone_info()
444 ret = -ENOMEM; in btrfs_get_dev_zone_info()
450 ret = -ENOMEM; in btrfs_get_dev_zone_info()
455 * Enable zone cache only for a zoned device. On a non-zoned device, we in btrfs_get_dev_zone_info()
456 * fill the zone info with emulated CONVENTIONAL zones, so no need to in btrfs_get_dev_zone_info()
459 if (populate_cache && bdev_is_zoned(device->bdev)) { in btrfs_get_dev_zone_info()
460 zone_info->zone_cache = vcalloc(zone_info->nr_zones, in btrfs_get_dev_zone_info()
462 if (!zone_info->zone_cache) { in btrfs_get_dev_zone_info()
463 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
465 rcu_str_deref(device->name)); in btrfs_get_dev_zone_info()
466 ret = -ENOMEM; in btrfs_get_dev_zone_info()
482 __set_bit(nreported, zone_info->seq_zones); in btrfs_get_dev_zone_info()
485 __set_bit(nreported, zone_info->empty_zones); in btrfs_get_dev_zone_info()
490 __set_bit(nreported, zone_info->active_zones); in btrfs_get_dev_zone_info()
496 sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; in btrfs_get_dev_zone_info()
499 if (nreported != zone_info->nr_zones) { in btrfs_get_dev_zone_info()
500 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
502 rcu_str_deref(device->name), nreported, in btrfs_get_dev_zone_info()
503 zone_info->nr_zones); in btrfs_get_dev_zone_info()
504 ret = -EIO; in btrfs_get_dev_zone_info()
510 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
512 nactive, rcu_str_deref(device->name), in btrfs_get_dev_zone_info()
514 ret = -EIO; in btrfs_get_dev_zone_info()
517 atomic_set(&zone_info->active_zones_left, in btrfs_get_dev_zone_info()
518 max_active_zones - nactive); in btrfs_get_dev_zone_info()
519 set_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags); in btrfs_get_dev_zone_info()
529 sb_zone = sb_zone_number(zone_info->zone_size_shift, i); in btrfs_get_dev_zone_info()
530 if (sb_zone + 1 >= zone_info->nr_zones) in btrfs_get_dev_zone_info()
535 &zone_info->sb_zones[sb_pos], in btrfs_get_dev_zone_info()
541 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
543 device->devid, sb_zone); in btrfs_get_dev_zone_info()
544 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
550 * zone to record superblock. No need to validate in that case. in btrfs_get_dev_zone_info()
552 if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == in btrfs_get_dev_zone_info()
556 ret = sb_write_pointer(device->bdev, in btrfs_get_dev_zone_info()
557 &zone_info->sb_zones[sb_pos], &sb_wp); in btrfs_get_dev_zone_info()
558 if (ret != -ENOENT && ret) { in btrfs_get_dev_zone_info()
559 btrfs_err_in_rcu(device->fs_info, in btrfs_get_dev_zone_info()
561 device->devid, sb_zone); in btrfs_get_dev_zone_info()
562 ret = -EUCLEAN; in btrfs_get_dev_zone_info()
571 model = "host-managed zoned"; in btrfs_get_dev_zone_info()
580 model, rcu_str_deref(device->name), zone_info->nr_zones, in btrfs_get_dev_zone_info()
581 emulated, zone_info->zone_size); in btrfs_get_dev_zone_info()
593 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_destroy_dev_zone_info()
598 bitmap_free(zone_info->active_zones); in btrfs_destroy_dev_zone_info()
599 bitmap_free(zone_info->seq_zones); in btrfs_destroy_dev_zone_info()
600 bitmap_free(zone_info->empty_zones); in btrfs_destroy_dev_zone_info()
601 vfree(zone_info->zone_cache); in btrfs_destroy_dev_zone_info()
603 device->zone_info = NULL; in btrfs_destroy_dev_zone_info()
610 zone_info = kmemdup(orig_dev->zone_info, sizeof(*zone_info), GFP_KERNEL); in btrfs_clone_dev_zone_info()
614 zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
615 if (!zone_info->seq_zones) in btrfs_clone_dev_zone_info()
618 bitmap_copy(zone_info->seq_zones, orig_dev->zone_info->seq_zones, in btrfs_clone_dev_zone_info()
619 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
621 zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
622 if (!zone_info->empty_zones) in btrfs_clone_dev_zone_info()
625 bitmap_copy(zone_info->empty_zones, orig_dev->zone_info->empty_zones, in btrfs_clone_dev_zone_info()
626 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
628 zone_info->active_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); in btrfs_clone_dev_zone_info()
629 if (!zone_info->active_zones) in btrfs_clone_dev_zone_info()
632 bitmap_copy(zone_info->active_zones, orig_dev->zone_info->active_zones, in btrfs_clone_dev_zone_info()
633 zone_info->nr_zones); in btrfs_clone_dev_zone_info()
634 zone_info->zone_cache = NULL; in btrfs_clone_dev_zone_info()
639 bitmap_free(zone_info->seq_zones); in btrfs_clone_dev_zone_info()
640 bitmap_free(zone_info->empty_zones); in btrfs_clone_dev_zone_info()
641 bitmap_free(zone_info->active_zones); in btrfs_clone_dev_zone_info()
653 return ret ? ret : -EIO; in btrfs_get_dev_zone()
662 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_for_zoned_device()
663 if (device->bdev && bdev_is_zoned(device->bdev)) { in btrfs_check_for_zoned_device()
666 device->bdev); in btrfs_check_for_zoned_device()
667 return -EINVAL; in btrfs_check_for_zoned_device()
676 struct queue_limits *lim = &fs_info->limits; in btrfs_check_zoned_mode()
682 * Host-Managed devices can't be used without the ZONED flag. With the in btrfs_check_zoned_mode()
690 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { in btrfs_check_zoned_mode()
691 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_check_zoned_mode()
693 if (!device->bdev) in btrfs_check_zoned_mode()
697 zone_size = zone_info->zone_size; in btrfs_check_zoned_mode()
698 } else if (zone_info->zone_size != zone_size) { in btrfs_check_zoned_mode()
701 zone_info->zone_size, zone_size); in btrfs_check_zoned_mode()
702 return -EINVAL; in btrfs_check_zoned_mode()
706 * With the zoned emulation, we can have non-zoned device on the in btrfs_check_zoned_mode()
710 if (bdev_is_zoned(device->bdev)) in btrfs_check_zoned_mode()
711 blk_stack_limits(lim, bdev_limits(device->bdev), 0); in btrfs_check_zoned_mode()
729 return -EINVAL; in btrfs_check_zoned_mode()
734 return -EINVAL; in btrfs_check_zoned_mode()
737 fs_info->zone_size = zone_size; in btrfs_check_zoned_mode()
745 fs_info->max_zone_append_size = ALIGN_DOWN( in btrfs_check_zoned_mode()
746 min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
747 (u64)lim->max_sectors << SECTOR_SHIFT, in btrfs_check_zoned_mode()
748 (u64)lim->max_segments << PAGE_SHIFT), in btrfs_check_zoned_mode()
749 fs_info->sectorsize); in btrfs_check_zoned_mode()
750 fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; in btrfs_check_zoned_mode()
752 fs_info->max_extent_size = min_not_zero(fs_info->max_extent_size, in btrfs_check_zoned_mode()
753 fs_info->max_zone_append_size); in btrfs_check_zoned_mode()
756 * Check mount options here, because we might change fs_info->zoned in btrfs_check_zoned_mode()
757 * from fs_info->zone_size. in btrfs_check_zoned_mode()
759 ret = btrfs_check_mountopts_zoned(fs_info, &fs_info->mount_opt); in btrfs_check_zoned_mode()
779 return -EINVAL; in btrfs_check_mountopts_zoned()
784 return -EINVAL; in btrfs_check_mountopts_zoned()
799 u64 wp; in sb_log_location() local
807 ret = sb_write_pointer(bdev, zones, &wp); in sb_log_location()
808 if (ret != -ENOENT && ret < 0) in sb_log_location()
814 if (wp == zones[0].start << SECTOR_SHIFT) in sb_log_location()
816 else if (wp == zones[1].start << SECTOR_SHIFT) in sb_log_location()
819 if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { in sb_log_location()
826 reset->start, reset->len); in sb_log_location()
831 reset->cond = BLK_ZONE_COND_EMPTY; in sb_log_location()
832 reset->wp = reset->start; in sb_log_location()
834 } else if (ret != -ENOENT) { in sb_log_location()
841 if (wp == zones[0].start << SECTOR_SHIFT) in sb_log_location()
843 else if (wp == zones[1].start << SECTOR_SHIFT) in sb_log_location()
846 wp = ALIGN_DOWN(zone_end << SECTOR_SHIFT, in sb_log_location()
849 wp -= BTRFS_SUPER_INFO_SIZE; in sb_log_location()
852 *bytenr_ret = wp; in sb_log_location()
877 return -EINVAL; in btrfs_sb_log_location_bdev()
884 return -ENOENT; in btrfs_sb_log_location_bdev()
892 return -EIO; in btrfs_sb_log_location_bdev()
900 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_sb_log_location()
904 * For a zoned filesystem on a non-zoned block device, use the same in btrfs_sb_log_location()
909 if (!bdev_is_zoned(device->bdev)) { in btrfs_sb_log_location()
914 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in btrfs_sb_log_location()
915 if (zone_num + 1 >= zinfo->nr_zones) in btrfs_sb_log_location()
916 return -ENOENT; in btrfs_sb_log_location()
918 return sb_log_location(device->bdev, in btrfs_sb_log_location()
919 &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], in btrfs_sb_log_location()
931 zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); in is_sb_log_zone()
932 if (zone_num + 1 >= zinfo->nr_zones) in is_sb_log_zone()
935 if (!test_bit(zone_num, zinfo->seq_zones)) in is_sb_log_zone()
943 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_advance_sb_log()
950 zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; in btrfs_advance_sb_log()
953 if (zone->cond == BLK_ZONE_COND_FULL) { in btrfs_advance_sb_log()
958 if (zone->cond == BLK_ZONE_COND_EMPTY) in btrfs_advance_sb_log()
959 zone->cond = BLK_ZONE_COND_IMP_OPEN; in btrfs_advance_sb_log()
961 zone->wp += SUPER_INFO_SECTORS; in btrfs_advance_sb_log()
965 * No room left to write new superblock. Since in btrfs_advance_sb_log()
972 if (zone->wp != zone->start + zone->capacity) { in btrfs_advance_sb_log()
977 ret = blkdev_zone_mgmt(device->bdev, in btrfs_advance_sb_log()
978 REQ_OP_ZONE_FINISH, zone->start, in btrfs_advance_sb_log()
979 zone->len); in btrfs_advance_sb_log()
985 zone->wp = zone->start + zone->len; in btrfs_advance_sb_log()
986 zone->cond = BLK_ZONE_COND_FULL; in btrfs_advance_sb_log()
993 return -EIO; in btrfs_advance_sb_log()
1013 return -ENOENT; in btrfs_reset_sb_log_zones()
1037 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_find_allocatable_zones()
1038 const u8 shift = zinfo->zone_size_shift; in btrfs_find_allocatable_zones()
1045 ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1046 ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); in btrfs_find_allocatable_zones()
1052 if (end > zinfo->nr_zones) in btrfs_find_allocatable_zones()
1057 !bitmap_test_range_all_set(zinfo->empty_zones, begin, nzones)) { in btrfs_find_allocatable_zones()
1058 pos += zinfo->zone_size; in btrfs_find_allocatable_zones()
1082 zinfo->zone_size); in btrfs_find_allocatable_zones()
1095 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_set_active_zone()
1096 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_set_active_zone()
1099 if (zone_info->max_active_zones == 0) in btrfs_dev_set_active_zone()
1102 if (!test_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1104 if (atomic_dec_if_positive(&zone_info->active_zones_left) < 0) in btrfs_dev_set_active_zone()
1106 if (test_and_set_bit(zno, zone_info->active_zones)) { in btrfs_dev_set_active_zone()
1108 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_set_active_zone()
1117 struct btrfs_zoned_device_info *zone_info = device->zone_info; in btrfs_dev_clear_active_zone()
1118 unsigned int zno = (pos >> zone_info->zone_size_shift); in btrfs_dev_clear_active_zone()
1121 if (zone_info->max_active_zones == 0) in btrfs_dev_clear_active_zone()
1124 if (test_and_clear_bit(zno, zone_info->active_zones)) in btrfs_dev_clear_active_zone()
1125 atomic_inc(&zone_info->active_zones_left); in btrfs_dev_clear_active_zone()
1136 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_device_zone()
1146 physical += device->zone_info->zone_size; in btrfs_reset_device_zone()
1147 length -= device->zone_info->zone_size; in btrfs_reset_device_zone()
1155 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_ensure_empty_zones()
1156 const u8 shift = zinfo->zone_size_shift; in btrfs_ensure_empty_zones()
1162 ASSERT(IS_ALIGNED(start, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1163 ASSERT(IS_ALIGNED(size, zinfo->zone_size)); in btrfs_ensure_empty_zones()
1165 if (begin + nbits > zinfo->nr_zones) in btrfs_ensure_empty_zones()
1166 return -ERANGE; in btrfs_ensure_empty_zones()
1169 if (bitmap_test_range_all_zero(zinfo->seq_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1173 if (bitmap_test_range_all_set(zinfo->seq_zones, begin, nbits) && in btrfs_ensure_empty_zones()
1174 bitmap_test_range_all_set(zinfo->empty_zones, begin, nbits)) in btrfs_ensure_empty_zones()
1177 for (pos = start; pos < start + size; pos += zinfo->zone_size) { in btrfs_ensure_empty_zones()
1186 device->fs_info, in btrfs_ensure_empty_zones()
1188 rcu_str_deref(device->name), device->devid, pos >> shift); in btrfs_ensure_empty_zones()
1191 ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, in btrfs_ensure_empty_zones()
1209 struct btrfs_fs_info *fs_info = cache->fs_info; in calculate_alloc_pointer()
1218 * Avoid tree lookups for a new block group, there's no use for it. in calculate_alloc_pointer()
1221 * Also, we have a lock chain of extent buffer lock -> chunk mutex. in calculate_alloc_pointer()
1234 return -ENOMEM; in calculate_alloc_pointer()
1236 key.objectid = cache->start + cache->length; in calculate_alloc_pointer()
1244 ret = -EUCLEAN; in calculate_alloc_pointer()
1248 ret = btrfs_previous_extent_item(root, path, cache->start); in calculate_alloc_pointer()
1257 btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); in calculate_alloc_pointer()
1262 length = fs_info->nodesize; in calculate_alloc_pointer()
1264 if (!(found_key.objectid >= cache->start && in calculate_alloc_pointer()
1265 found_key.objectid + length <= cache->start + cache->length)) { in calculate_alloc_pointer()
1266 return -EUCLEAN; in calculate_alloc_pointer()
1268 *offset_ret = found_key.objectid + length - cache->start; in calculate_alloc_pointer()
1282 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in btrfs_load_zone_info()
1289 info->physical = map->stripes[zone_idx].physical; in btrfs_load_zone_info()
1291 down_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1292 device = map->stripes[zone_idx].dev; in btrfs_load_zone_info()
1294 if (!device->bdev) { in btrfs_load_zone_info()
1295 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1296 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1301 if (!device->zone_info->max_active_zones) in btrfs_load_zone_info()
1304 if (!btrfs_dev_is_sequential(device, info->physical)) { in btrfs_load_zone_info()
1305 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1306 info->alloc_offset = WP_CONVENTIONAL; in btrfs_load_zone_info()
1310 /* This zone will be used for allocation, so mark this zone non-empty. */ in btrfs_load_zone_info()
1311 btrfs_dev_clear_zone_empty(device, info->physical); in btrfs_load_zone_info()
1314 if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) in btrfs_load_zone_info()
1315 btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); in btrfs_load_zone_info()
1321 WARN_ON(!IS_ALIGNED(info->physical, fs_info->zone_size)); in btrfs_load_zone_info()
1323 ret = btrfs_get_dev_zone(device, info->physical, &zone); in btrfs_load_zone_info()
1326 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1327 if (ret != -EIO && ret != -EOPNOTSUPP) in btrfs_load_zone_info()
1329 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1336 zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), in btrfs_load_zone_info()
1337 device->devid); in btrfs_load_zone_info()
1338 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1339 return -EIO; in btrfs_load_zone_info()
1342 info->capacity = (zone.capacity << SECTOR_SHIFT); in btrfs_load_zone_info()
1349 (info->physical >> device->zone_info->zone_size_shift), in btrfs_load_zone_info()
1350 rcu_str_deref(device->name), device->devid); in btrfs_load_zone_info()
1351 info->alloc_offset = WP_MISSING_DEV; in btrfs_load_zone_info()
1354 info->alloc_offset = 0; in btrfs_load_zone_info()
1357 info->alloc_offset = info->capacity; in btrfs_load_zone_info()
1361 info->alloc_offset = ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_load_zone_info()
1366 up_read(&dev_replace->rwsem); in btrfs_load_zone_info()
1375 if (info->alloc_offset == WP_MISSING_DEV) { in btrfs_load_block_group_single()
1376 btrfs_err(bg->fs_info, in btrfs_load_block_group_single()
1378 info->physical); in btrfs_load_block_group_single()
1379 return -EIO; in btrfs_load_block_group_single()
1382 bg->alloc_offset = info->alloc_offset; in btrfs_load_block_group_single()
1383 bg->zone_capacity = info->capacity; in btrfs_load_block_group_single()
1385 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_single()
1394 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_dup()
1396 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_dup()
1397 btrfs_err(fs_info, "zoned: data DUP profile needs raid-stripe-tree"); in btrfs_load_block_group_dup()
1398 return -EINVAL; in btrfs_load_block_group_dup()
1401 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_dup()
1404 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1407 return -EIO; in btrfs_load_block_group_dup()
1410 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1413 return -EIO; in btrfs_load_block_group_dup()
1416 btrfs_err(bg->fs_info, in btrfs_load_block_group_dup()
1418 return -EIO; in btrfs_load_block_group_dup()
1423 return -EIO; in btrfs_load_block_group_dup()
1425 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_dup()
1428 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_dup()
1437 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid1()
1440 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid1()
1441 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid1()
1442 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1443 return -EINVAL; in btrfs_load_block_group_raid1()
1447 bg->zone_capacity = min_not_zero(zone_info[0].capacity, zone_info[1].capacity); in btrfs_load_block_group_raid1()
1449 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid1()
1458 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid1()
1459 return -EIO; in btrfs_load_block_group_raid1()
1464 return -EIO; in btrfs_load_block_group_raid1()
1468 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid1()
1473 bg->alloc_offset = zone_info[0].alloc_offset; in btrfs_load_block_group_raid1()
1475 bg->alloc_offset = zone_info[i - 1].alloc_offset; in btrfs_load_block_group_raid1()
1485 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid0()
1487 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid0()
1488 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid0()
1489 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid0()
1490 return -EINVAL; in btrfs_load_block_group_raid0()
1493 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid0()
1500 return -EIO; in btrfs_load_block_group_raid0()
1503 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid0()
1505 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid0()
1506 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid0()
1517 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_load_block_group_raid10()
1519 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && !fs_info->stripe_root) { in btrfs_load_block_group_raid10()
1520 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_raid10()
1521 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_raid10()
1522 return -EINVAL; in btrfs_load_block_group_raid10()
1525 for (int i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_raid10()
1532 return -EIO; in btrfs_load_block_group_raid10()
1535 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &bg->runtime_flags); in btrfs_load_block_group_raid10()
1538 if ((i % map->sub_stripes) == 0) { in btrfs_load_block_group_raid10()
1539 bg->zone_capacity += zone_info[i].capacity; in btrfs_load_block_group_raid10()
1540 bg->alloc_offset += zone_info[i].alloc_offset; in btrfs_load_block_group_raid10()
1549 struct btrfs_fs_info *fs_info = cache->fs_info; in btrfs_load_block_group_zone_info()
1551 u64 logical = cache->start; in btrfs_load_block_group_zone_info()
1552 u64 length = cache->length; in btrfs_load_block_group_zone_info()
1565 if (!IS_ALIGNED(length, fs_info->zone_size)) { in btrfs_load_block_group_zone_info()
1568 logical, length, fs_info->zone_size); in btrfs_load_block_group_zone_info()
1569 return -EIO; in btrfs_load_block_group_zone_info()
1574 return -EINVAL; in btrfs_load_block_group_zone_info()
1576 cache->physical_map = map; in btrfs_load_block_group_zone_info()
1578 zone_info = kcalloc(map->num_stripes, sizeof(*zone_info), GFP_NOFS); in btrfs_load_block_group_zone_info()
1580 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1584 active = bitmap_zalloc(map->num_stripes, GFP_NOFS); in btrfs_load_block_group_zone_info()
1586 ret = -ENOMEM; in btrfs_load_block_group_zone_info()
1590 for (i = 0; i < map->num_stripes; i++) { in btrfs_load_block_group_zone_info()
1602 set_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1606 cache->zone_capacity = cache->length; in btrfs_load_block_group_zone_info()
1611 cache->start); in btrfs_load_block_group_zone_info()
1613 } else if (map->num_stripes == num_conventional) { in btrfs_load_block_group_zone_info()
1614 cache->alloc_offset = last_alloc; in btrfs_load_block_group_zone_info()
1615 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags); in btrfs_load_block_group_zone_info()
1620 profile = map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK; in btrfs_load_block_group_zone_info()
1643 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1644 ret = -EINVAL; in btrfs_load_block_group_zone_info()
1648 if (ret == -EIO && profile != 0 && profile != BTRFS_BLOCK_GROUP_RAID0 && in btrfs_load_block_group_zone_info()
1661 cache->alloc_offset = cache->zone_capacity; in btrfs_load_block_group_zone_info()
1667 if ((map->type & BTRFS_BLOCK_GROUP_DATA) && in btrfs_load_block_group_zone_info()
1668 (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) && in btrfs_load_block_group_zone_info()
1669 !fs_info->stripe_root) { in btrfs_load_block_group_zone_info()
1670 btrfs_err(fs_info, "zoned: data %s needs raid-stripe-tree", in btrfs_load_block_group_zone_info()
1671 btrfs_bg_type_to_raid_name(map->type)); in btrfs_load_block_group_zone_info()
1672 return -EINVAL; in btrfs_load_block_group_zone_info()
1675 if (cache->alloc_offset > cache->zone_capacity) { in btrfs_load_block_group_zone_info()
1678 cache->alloc_offset, cache->zone_capacity, in btrfs_load_block_group_zone_info()
1679 cache->start); in btrfs_load_block_group_zone_info()
1680 ret = -EIO; in btrfs_load_block_group_zone_info()
1684 if (!ret && num_conventional && last_alloc > cache->alloc_offset) { in btrfs_load_block_group_zone_info()
1687 logical, last_alloc, cache->alloc_offset); in btrfs_load_block_group_zone_info()
1688 ret = -EIO; in btrfs_load_block_group_zone_info()
1692 cache->meta_write_pointer = cache->alloc_offset + cache->start; in btrfs_load_block_group_zone_info()
1693 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &cache->runtime_flags)) { in btrfs_load_block_group_zone_info()
1695 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1696 list_add_tail(&cache->active_bg_list, in btrfs_load_block_group_zone_info()
1697 &fs_info->zone_active_bgs); in btrfs_load_block_group_zone_info()
1698 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_load_block_group_zone_info()
1701 btrfs_free_chunk_map(cache->physical_map); in btrfs_load_block_group_zone_info()
1702 cache->physical_map = NULL; in btrfs_load_block_group_zone_info()
1714 if (!btrfs_is_zoned(cache->fs_info)) in btrfs_calc_zone_unusable()
1717 WARN_ON(cache->bytes_super != 0); in btrfs_calc_zone_unusable()
1718 unusable = (cache->alloc_offset - cache->used) + in btrfs_calc_zone_unusable()
1719 (cache->length - cache->zone_capacity); in btrfs_calc_zone_unusable()
1720 free = cache->zone_capacity - cache->alloc_offset; in btrfs_calc_zone_unusable()
1722 /* We only need ->free_space in ALLOC_SEQ block groups */ in btrfs_calc_zone_unusable()
1723 cache->cached = BTRFS_CACHE_FINISHED; in btrfs_calc_zone_unusable()
1724 cache->free_space_ctl->free_space = free; in btrfs_calc_zone_unusable()
1725 cache->zone_unusable = unusable; in btrfs_calc_zone_unusable()
1730 u64 start = (bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT); in btrfs_use_zone_append()
1731 struct btrfs_inode *inode = bbio->inode; in btrfs_use_zone_append()
1732 struct btrfs_fs_info *fs_info = bbio->fs_info; in btrfs_use_zone_append()
1742 if (btrfs_op(&bbio->bio) != BTRFS_MAP_WRITE) in btrfs_use_zone_append()
1748 * Furthermore we have set aside own block-group from which only the in btrfs_use_zone_append()
1753 if (btrfs_is_data_reloc_root(inode->root)) in btrfs_use_zone_append()
1761 ret = !!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &cache->runtime_flags); in btrfs_use_zone_append()
1769 const u64 physical = bbio->bio.bi_iter.bi_sector << SECTOR_SHIFT; in btrfs_record_physical_zoned()
1770 struct btrfs_ordered_sum *sum = bbio->sums; in btrfs_record_physical_zoned()
1772 if (physical < bbio->orig_physical) in btrfs_record_physical_zoned()
1773 sum->logical -= bbio->orig_physical - physical; in btrfs_record_physical_zoned()
1775 sum->logical += physical - bbio->orig_physical; in btrfs_record_physical_zoned()
1781 struct extent_map_tree *em_tree = &ordered->inode->extent_tree; in btrfs_rewrite_logical_zoned()
1784 ordered->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1786 write_lock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1787 em = search_extent_mapping(em_tree, ordered->file_offset, in btrfs_rewrite_logical_zoned()
1788 ordered->num_bytes); in btrfs_rewrite_logical_zoned()
1790 ASSERT(em->offset == 0); in btrfs_rewrite_logical_zoned()
1791 em->disk_bytenr = logical; in btrfs_rewrite_logical_zoned()
1793 write_unlock(&em_tree->lock); in btrfs_rewrite_logical_zoned()
1801 if (!test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags) && in btrfs_zoned_split_ordered()
1802 split_extent_map(ordered->inode, ordered->file_offset, in btrfs_zoned_split_ordered()
1803 ordered->num_bytes, len, logical)) in btrfs_zoned_split_ordered()
1809 new->disk_bytenr = logical; in btrfs_zoned_split_ordered()
1816 struct btrfs_inode *inode = ordered->inode; in btrfs_finish_ordered_zoned()
1817 struct btrfs_fs_info *fs_info = inode->root->fs_info; in btrfs_finish_ordered_zoned()
1822 * Write to pre-allocated region is for the data relocation, and so in btrfs_finish_ordered_zoned()
1823 * it should use WRITE operation. No split/rewrite are necessary. in btrfs_finish_ordered_zoned()
1825 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) in btrfs_finish_ordered_zoned()
1828 ASSERT(!list_empty(&ordered->list)); in btrfs_finish_ordered_zoned()
1829 /* The ordered->list can be empty in the above pre-alloc case. */ in btrfs_finish_ordered_zoned()
1830 sum = list_first_entry(&ordered->list, struct btrfs_ordered_sum, list); in btrfs_finish_ordered_zoned()
1831 logical = sum->logical; in btrfs_finish_ordered_zoned()
1832 len = sum->len; in btrfs_finish_ordered_zoned()
1834 while (len < ordered->disk_num_bytes) { in btrfs_finish_ordered_zoned()
1836 if (sum->logical == logical + len) { in btrfs_finish_ordered_zoned()
1837 len += sum->len; in btrfs_finish_ordered_zoned()
1841 set_bit(BTRFS_ORDERED_IOERR, &ordered->flags); in btrfs_finish_ordered_zoned()
1845 logical = sum->logical; in btrfs_finish_ordered_zoned()
1846 len = sum->len; in btrfs_finish_ordered_zoned()
1849 if (ordered->disk_bytenr != logical) in btrfs_finish_ordered_zoned()
1859 if ((inode->flags & BTRFS_INODE_NODATASUM) || in btrfs_finish_ordered_zoned()
1860 test_bit(BTRFS_FS_STATE_NO_DATA_CSUMS, &fs_info->fs_state)) { in btrfs_finish_ordered_zoned()
1861 while ((sum = list_first_entry_or_null(&ordered->list, in btrfs_finish_ordered_zoned()
1863 list_del(&sum->list); in btrfs_finish_ordered_zoned()
1872 const struct writeback_control *wbc = ctx->wbc; in check_bg_is_active()
1873 struct btrfs_block_group *block_group = ctx->zoned_bg; in check_bg_is_active()
1874 struct btrfs_fs_info *fs_info = block_group->fs_info; in check_bg_is_active()
1876 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) in check_bg_is_active()
1879 if (fs_info->treelog_bg == block_group->start) { in check_bg_is_active()
1889 /* zoned_meta_io_lock protects fs_info->active_{meta,system}_bg. */ in check_bg_is_active()
1890 lockdep_assert_held(&fs_info->zoned_meta_io_lock); in check_bg_is_active()
1897 if (tgt->meta_write_pointer < tgt->start + tgt->alloc_offset) { in check_bg_is_active()
1898 if (wbc->sync_mode == WB_SYNC_NONE || in check_bg_is_active()
1899 (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync)) in check_bg_is_active()
1926 * Check if @ctx->eb is aligned to the write pointer.
1929 * 0: @ctx->eb is at the write pointer. You can write it.
1930 * -EAGAIN: There is a hole. The caller should handle the case.
1931 * -EBUSY: There is a hole, but the caller can just bail out.
1936 const struct writeback_control *wbc = ctx->wbc; in btrfs_check_meta_write_pointer()
1937 const struct extent_buffer *eb = ctx->eb; in btrfs_check_meta_write_pointer()
1938 struct btrfs_block_group *block_group = ctx->zoned_bg; in btrfs_check_meta_write_pointer()
1944 if (block_group->start > eb->start || in btrfs_check_meta_write_pointer()
1945 block_group->start + block_group->length <= eb->start) { in btrfs_check_meta_write_pointer()
1948 ctx->zoned_bg = NULL; in btrfs_check_meta_write_pointer()
1953 block_group = btrfs_lookup_block_group(fs_info, eb->start); in btrfs_check_meta_write_pointer()
1956 ctx->zoned_bg = block_group; in btrfs_check_meta_write_pointer()
1959 if (block_group->meta_write_pointer == eb->start) { in btrfs_check_meta_write_pointer()
1962 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_meta_write_pointer()
1965 if (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) in btrfs_check_meta_write_pointer()
1966 tgt = &fs_info->active_system_bg; in btrfs_check_meta_write_pointer()
1968 tgt = &fs_info->active_meta_bg; in btrfs_check_meta_write_pointer()
1974 * Since we may release fs_info->zoned_meta_io_lock, someone can already in btrfs_check_meta_write_pointer()
1977 if (block_group->meta_write_pointer > eb->start) in btrfs_check_meta_write_pointer()
1978 return -EBUSY; in btrfs_check_meta_write_pointer()
1981 if (wbc->sync_mode == WB_SYNC_ALL && !wbc->for_sync) in btrfs_check_meta_write_pointer()
1982 return -EAGAIN; in btrfs_check_meta_write_pointer()
1983 return -EBUSY; in btrfs_check_meta_write_pointer()
1989 return -EOPNOTSUPP; in btrfs_zoned_issue_zeroout()
1991 return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, in btrfs_zoned_issue_zeroout()
2007 ret = -EIO; in read_zone_info()
2011 if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) { in read_zone_info()
2012 ret = -EINVAL; in read_zone_info()
2017 nmirrors = (int)bioc->num_stripes; in read_zone_info()
2019 u64 physical = bioc->stripes[i].physical; in read_zone_info()
2020 struct btrfs_device *dev = bioc->stripes[i].dev; in read_zone_info()
2023 if (!dev->bdev) in read_zone_info()
2028 if (ret == -EIO || ret == -EOPNOTSUPP) in read_zone_info()
2040 * filling zeros between @physical_pos to a write pointer of dev-replace
2046 struct btrfs_fs_info *fs_info = tgt_dev->fs_info; in btrfs_sync_zone_write_pointer()
2049 u64 wp; in btrfs_sync_zone_write_pointer() local
2059 wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); in btrfs_sync_zone_write_pointer()
2061 if (physical_pos == wp) in btrfs_sync_zone_write_pointer()
2064 if (physical_pos > wp) in btrfs_sync_zone_write_pointer()
2065 return -EUCLEAN; in btrfs_sync_zone_write_pointer()
2067 length = wp - physical_pos; in btrfs_sync_zone_write_pointer()
2080 struct btrfs_fs_info *fs_info = block_group->fs_info; in btrfs_zone_activate()
2084 const bool is_data = (block_group->flags & BTRFS_BLOCK_GROUP_DATA); in btrfs_zone_activate()
2088 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_activate()
2091 map = block_group->physical_map; in btrfs_zone_activate()
2093 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2094 spin_lock(&block_group->lock); in btrfs_zone_activate()
2095 if (test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in btrfs_zone_activate()
2100 /* No space left */ in btrfs_zone_activate()
2106 for (i = 0; i < map->num_stripes; i++) { in btrfs_zone_activate()
2110 device = map->stripes[i].dev; in btrfs_zone_activate()
2111 physical = map->stripes[i].physical; in btrfs_zone_activate()
2112 zinfo = device->zone_info; in btrfs_zone_activate()
2114 if (!device->bdev) in btrfs_zone_activate()
2117 if (zinfo->max_active_zones == 0) in btrfs_zone_activate()
2121 reserved = zinfo->reserved_active_zones; in btrfs_zone_activate()
2126 if (atomic_read(&zinfo->active_zones_left) <= reserved) { in btrfs_zone_activate()
2137 zinfo->reserved_active_zones--; in btrfs_zone_activate()
2141 set_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in btrfs_zone_activate()
2142 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2146 list_add_tail(&block_group->active_bg_list, &fs_info->zone_active_bgs); in btrfs_zone_activate()
2147 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2152 spin_unlock(&block_group->lock); in btrfs_zone_activate()
2153 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_activate()
2159 struct btrfs_fs_info *fs_info = block_group->fs_info; in wait_eb_writebacks()
2160 const u64 end = block_group->start + block_group->length; in wait_eb_writebacks()
2166 radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, in wait_eb_writebacks()
2167 block_group->start >> fs_info->sectorsize_bits) { in wait_eb_writebacks()
2176 if (eb->start < block_group->start) in wait_eb_writebacks()
2178 if (eb->start >= end) in wait_eb_writebacks()
2191 struct btrfs_fs_info *fs_info = block_group->fs_info; in do_zone_finish()
2193 const bool is_metadata = (block_group->flags & in do_zone_finish()
2195 struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; in do_zone_finish()
2199 spin_lock(&block_group->lock); in do_zone_finish()
2200 if (!test_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags)) { in do_zone_finish()
2201 spin_unlock(&block_group->lock); in do_zone_finish()
2207 block_group->start + block_group->alloc_offset > block_group->meta_write_pointer) { in do_zone_finish()
2208 spin_unlock(&block_group->lock); in do_zone_finish()
2209 return -EAGAIN; in do_zone_finish()
2213 * If we are sure that the block group is full (= no more room left for in do_zone_finish()
2217 * and block_group->meta_write_pointer for metadata. in do_zone_finish()
2220 if (test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in do_zone_finish()
2221 spin_unlock(&block_group->lock); in do_zone_finish()
2222 return -EAGAIN; in do_zone_finish()
2224 spin_unlock(&block_group->lock); in do_zone_finish()
2232 /* No need to wait for NOCOW writers. Zoned mode does not allow that */ in do_zone_finish()
2238 spin_lock(&block_group->lock); in do_zone_finish()
2245 &block_group->runtime_flags)) { in do_zone_finish()
2246 spin_unlock(&block_group->lock); in do_zone_finish()
2251 if (block_group->reserved || in do_zone_finish()
2253 &block_group->runtime_flags)) { in do_zone_finish()
2254 spin_unlock(&block_group->lock); in do_zone_finish()
2256 return -EAGAIN; in do_zone_finish()
2260 clear_bit(BLOCK_GROUP_FLAG_ZONE_IS_ACTIVE, &block_group->runtime_flags); in do_zone_finish()
2261 block_group->alloc_offset = block_group->zone_capacity; in do_zone_finish()
2262 if (block_group->flags & (BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_SYSTEM)) in do_zone_finish()
2263 block_group->meta_write_pointer = block_group->start + in do_zone_finish()
2264 block_group->zone_capacity; in do_zone_finish()
2265 block_group->free_space_ctl->free_space = 0; in do_zone_finish()
2268 spin_unlock(&block_group->lock); in do_zone_finish()
2270 down_read(&dev_replace->rwsem); in do_zone_finish()
2271 map = block_group->physical_map; in do_zone_finish()
2272 for (i = 0; i < map->num_stripes; i++) { in do_zone_finish()
2273 struct btrfs_device *device = map->stripes[i].dev; in do_zone_finish()
2274 const u64 physical = map->stripes[i].physical; in do_zone_finish()
2275 struct btrfs_zoned_device_info *zinfo = device->zone_info; in do_zone_finish()
2278 if (!device->bdev) in do_zone_finish()
2281 if (zinfo->max_active_zones == 0) in do_zone_finish()
2285 ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH, in do_zone_finish()
2287 zinfo->zone_size >> SECTOR_SHIFT); in do_zone_finish()
2291 up_read(&dev_replace->rwsem); in do_zone_finish()
2295 if (!(block_group->flags & BTRFS_BLOCK_GROUP_DATA)) in do_zone_finish()
2296 zinfo->reserved_active_zones++; in do_zone_finish()
2299 up_read(&dev_replace->rwsem); in do_zone_finish()
2304 spin_lock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2305 ASSERT(!list_empty(&block_group->active_bg_list)); in do_zone_finish()
2306 list_del_init(&block_group->active_bg_list); in do_zone_finish()
2307 spin_unlock(&fs_info->zone_active_bgs_lock); in do_zone_finish()
2312 clear_and_wake_up_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in do_zone_finish()
2319 if (!btrfs_is_zoned(block_group->fs_info)) in btrfs_zone_finish()
2327 struct btrfs_fs_info *fs_info = fs_devices->fs_info; in btrfs_can_activate_zone()
2335 mutex_lock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2336 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2337 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { in btrfs_can_activate_zone()
2338 struct btrfs_zoned_device_info *zinfo = device->zone_info; in btrfs_can_activate_zone()
2341 if (!device->bdev) in btrfs_can_activate_zone()
2344 if (!zinfo->max_active_zones) { in btrfs_can_activate_zone()
2350 reserved = zinfo->reserved_active_zones; in btrfs_can_activate_zone()
2354 ret = (atomic_read(&zinfo->active_zones_left) >= (1 + reserved)); in btrfs_can_activate_zone()
2357 ret = (atomic_read(&zinfo->active_zones_left) >= (2 + reserved)); in btrfs_can_activate_zone()
2363 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_can_activate_zone()
2364 mutex_unlock(&fs_info->chunk_mutex); in btrfs_can_activate_zone()
2367 set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags); in btrfs_can_activate_zone()
2383 /* No MIXED_BG on zoned btrfs. */ in btrfs_zone_finish_endio()
2384 if (block_group->flags & BTRFS_BLOCK_GROUP_DATA) in btrfs_zone_finish_endio()
2385 min_alloc_bytes = fs_info->sectorsize; in btrfs_zone_finish_endio()
2387 min_alloc_bytes = fs_info->nodesize; in btrfs_zone_finish_endio()
2391 block_group->start + block_group->zone_capacity) in btrfs_zone_finish_endio()
2405 wait_on_extent_buffer_writeback(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2406 free_extent_buffer(bg->last_eb); in btrfs_zone_finish_endio_workfn()
2407 btrfs_zone_finish_endio(bg->fs_info, bg->start, bg->length); in btrfs_zone_finish_endio_workfn()
2414 if (!test_bit(BLOCK_GROUP_FLAG_SEQUENTIAL_ZONE, &bg->runtime_flags) || in btrfs_schedule_zone_finish_bg()
2415 eb->start + eb->len * 2 <= bg->start + bg->zone_capacity) in btrfs_schedule_zone_finish_bg()
2418 if (WARN_ON(bg->zone_finish_work.func == btrfs_zone_finish_endio_workfn)) { in btrfs_schedule_zone_finish_bg()
2419 btrfs_err(bg->fs_info, "double scheduling of bg %llu zone finishing", in btrfs_schedule_zone_finish_bg()
2420 bg->start); in btrfs_schedule_zone_finish_bg()
2426 atomic_inc(&eb->refs); in btrfs_schedule_zone_finish_bg()
2427 bg->last_eb = eb; in btrfs_schedule_zone_finish_bg()
2428 INIT_WORK(&bg->zone_finish_work, btrfs_zone_finish_endio_workfn); in btrfs_schedule_zone_finish_bg()
2429 queue_work(system_unbound_wq, &bg->zone_finish_work); in btrfs_schedule_zone_finish_bg()
2434 struct btrfs_fs_info *fs_info = bg->fs_info; in btrfs_clear_data_reloc_bg()
2436 spin_lock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2437 if (fs_info->data_reloc_bg == bg->start) in btrfs_clear_data_reloc_bg()
2438 fs_info->data_reloc_bg = 0; in btrfs_clear_data_reloc_bg()
2439 spin_unlock(&fs_info->relocation_bg_lock); in btrfs_clear_data_reloc_bg()
2444 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_free_zone_cache()
2450 mutex_lock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2451 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_free_zone_cache()
2452 if (device->zone_info) { in btrfs_free_zone_cache()
2453 vfree(device->zone_info->zone_cache); in btrfs_free_zone_cache()
2454 device->zone_info->zone_cache = NULL; in btrfs_free_zone_cache()
2457 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_free_zone_cache()
2462 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_zoned_should_reclaim()
2470 if (fs_info->bg_reclaim_threshold == 0) in btrfs_zoned_should_reclaim()
2473 mutex_lock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2474 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_zoned_should_reclaim()
2475 if (!device->bdev) in btrfs_zoned_should_reclaim()
2478 total += device->disk_total_bytes; in btrfs_zoned_should_reclaim()
2479 used += device->bytes_used; in btrfs_zoned_should_reclaim()
2481 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_zoned_should_reclaim()
2484 return factor >= fs_info->bg_reclaim_threshold; in btrfs_zoned_should_reclaim()
2497 ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)); in btrfs_zoned_release_data_reloc_bg()
2499 spin_lock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2500 if (!test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) in btrfs_zoned_release_data_reloc_bg()
2504 if (block_group->start + block_group->alloc_offset == logical + length) { in btrfs_zoned_release_data_reloc_bg()
2510 &block_group->runtime_flags); in btrfs_zoned_release_data_reloc_bg()
2514 spin_unlock(&block_group->lock); in btrfs_zoned_release_data_reloc_bg()
2525 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2526 list_for_each_entry(block_group, &fs_info->zone_active_bgs, in btrfs_zone_finish_one_bg()
2530 spin_lock(&block_group->lock); in btrfs_zone_finish_one_bg()
2531 if (block_group->reserved || block_group->alloc_offset == 0 || in btrfs_zone_finish_one_bg()
2532 (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM) || in btrfs_zone_finish_one_bg()
2533 test_bit(BLOCK_GROUP_FLAG_ZONED_DATA_RELOC, &block_group->runtime_flags)) { in btrfs_zone_finish_one_bg()
2534 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2538 avail = block_group->zone_capacity - block_group->alloc_offset; in btrfs_zone_finish_one_bg()
2546 spin_unlock(&block_group->lock); in btrfs_zone_finish_one_bg()
2548 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_zone_finish_one_bg()
2566 if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA)) in btrfs_zoned_activate_one_bg()
2573 down_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2575 list_for_each_entry(bg, &space_info->block_groups[index], in btrfs_zoned_activate_one_bg()
2577 if (!spin_trylock(&bg->lock)) in btrfs_zoned_activate_one_bg()
2581 &bg->runtime_flags)) { in btrfs_zoned_activate_one_bg()
2582 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2585 spin_unlock(&bg->lock); in btrfs_zoned_activate_one_bg()
2588 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2595 up_read(&space_info->groups_sem); in btrfs_zoned_activate_one_bg()
2611 * Reserve zones for one metadata block group, one tree-log block group, and one
2616 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; in btrfs_check_active_zone_reservation()
2619 /* Reserve zones for normal SINGLE metadata and tree-log block group. */ in btrfs_check_active_zone_reservation()
2624 if (!test_bit(BTRFS_FS_ACTIVE_ZONE_TRACKING, &fs_info->flags)) in btrfs_check_active_zone_reservation()
2628 * This function is called from the mount context. So, there is no in btrfs_check_active_zone_reservation()
2629 * parallel process touching the bits. No need for read_seqretry(). in btrfs_check_active_zone_reservation()
2631 if (fs_info->avail_metadata_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2633 if (fs_info->avail_system_alloc_bits & BTRFS_BLOCK_GROUP_DUP) in btrfs_check_active_zone_reservation()
2637 mutex_lock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2638 list_for_each_entry(device, &fs_devices->devices, dev_list) { in btrfs_check_active_zone_reservation()
2639 if (!device->bdev) in btrfs_check_active_zone_reservation()
2642 device->zone_info->reserved_active_zones = in btrfs_check_active_zone_reservation()
2645 mutex_unlock(&fs_devices->device_list_mutex); in btrfs_check_active_zone_reservation()
2648 spin_lock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2649 list_for_each_entry(block_group, &fs_info->zone_active_bgs, active_bg_list) { in btrfs_check_active_zone_reservation()
2650 struct btrfs_chunk_map *map = block_group->physical_map; in btrfs_check_active_zone_reservation()
2652 if (!(block_group->flags & in btrfs_check_active_zone_reservation()
2656 for (int i = 0; i < map->num_stripes; i++) in btrfs_check_active_zone_reservation()
2657 map->stripes[i].dev->zone_info->reserved_active_zones--; in btrfs_check_active_zone_reservation()
2659 spin_unlock(&fs_info->zone_active_bgs_lock); in btrfs_check_active_zone_reservation()
2663 * Reset the zones of unused block groups from @space_info->bytes_zone_unusable.
2679 struct btrfs_fs_info *fs_info = space_info->fs_info; in btrfs_reset_unused_block_groups()
2680 const sector_t zone_size_sectors = fs_info->zone_size >> SECTOR_SHIFT; in btrfs_reset_unused_block_groups()
2699 spin_lock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2700 list_for_each_entry(bg, &fs_info->unused_bgs, bg_list) { in btrfs_reset_unused_block_groups()
2701 if ((bg->flags & BTRFS_BLOCK_GROUP_TYPE_MASK) != space_info->flags) in btrfs_reset_unused_block_groups()
2707 * &bg->lock -> &fs_info->unused_bgs_lock. We skip a in btrfs_reset_unused_block_groups()
2710 if (!spin_trylock(&bg->lock)) in btrfs_reset_unused_block_groups()
2712 if (btrfs_is_block_group_used(bg) || bg->zone_unusable < bg->length) { in btrfs_reset_unused_block_groups()
2713 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2716 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2721 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2725 list_del_init(&bg->bg_list); in btrfs_reset_unused_block_groups()
2727 spin_unlock(&fs_info->unused_bgs_lock); in btrfs_reset_unused_block_groups()
2732 * this block group read-only. in btrfs_reset_unused_block_groups()
2735 down_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2736 map = bg->physical_map; in btrfs_reset_unused_block_groups()
2737 for (int i = 0; i < map->num_stripes; i++) { in btrfs_reset_unused_block_groups()
2738 struct btrfs_io_stripe *stripe = &map->stripes[i]; in btrfs_reset_unused_block_groups()
2743 ret = blkdev_zone_mgmt(stripe->dev->bdev, REQ_OP_ZONE_RESET, in btrfs_reset_unused_block_groups()
2744 stripe->physical >> SECTOR_SHIFT, in btrfs_reset_unused_block_groups()
2749 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2753 up_read(&fs_info->dev_replace.rwsem); in btrfs_reset_unused_block_groups()
2755 spin_lock(&space_info->lock); in btrfs_reset_unused_block_groups()
2756 spin_lock(&bg->lock); in btrfs_reset_unused_block_groups()
2758 if (bg->ro) { in btrfs_reset_unused_block_groups()
2759 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2760 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2764 reclaimed = bg->alloc_offset; in btrfs_reset_unused_block_groups()
2765 bg->zone_unusable = bg->length - bg->zone_capacity; in btrfs_reset_unused_block_groups()
2766 bg->alloc_offset = 0; in btrfs_reset_unused_block_groups()
2771 ASSERT(reclaimed == bg->zone_capacity); in btrfs_reset_unused_block_groups()
2772 bg->free_space_ctl->free_space += reclaimed; in btrfs_reset_unused_block_groups()
2773 space_info->bytes_zone_unusable -= reclaimed; in btrfs_reset_unused_block_groups()
2774 spin_unlock(&bg->lock); in btrfs_reset_unused_block_groups()
2776 spin_unlock(&space_info->lock); in btrfs_reset_unused_block_groups()
2780 num_bytes -= reclaimed; in btrfs_reset_unused_block_groups()