On 2021/03/15 14:55, Naohiro Aota wrote: > This commit moves the location of superblock logging zones. The location of > the logging zones are determined based on fixed block addresses instead of > on fixed zone numbers.
David, Any comment on this ? It would be nice to get this settled in this cycle so that we have a stable on-disk format going forward. btrfs-tools and libblkid zoned support patches also depend on this. > > By locating the superblock zones using fixed addresses, we can scan a > dumped file system image without the zone information. And, no drawbacks > exist. > > We use the following three pairs of zones containing fixed offset > locations, regardless of the device zone size. > > - Primary superblock: zone starting at offset 0 and the following zone > - First copy: zone containing offset 64GB and the following zone > - Second copy: zone containing offset 256GB and the following zone > > If the location of the zones are outside of disk, we don't record the > superblock copy. > > These addresses are arbitrary, but using addresses that are too large > reduces superblock reliability for smaller devices, so we do not want to > exceed 1T to cover all case nicely. > > Also, LBAs are generally distributed initially across one head (platter > side) up to one or more zones, then go on the next head backward (the other > side of the same platter), and on to the following head/platter. Thus using > non sequential fixed addresses for superblock logging, such as 0/64G/256G, > likely result in each superblock copy being on a different head/platter > which improves chances of recovery in case of superblock read error. > > These zones are reserved for superblock logging and never used for data or > metadata blocks. Zones containing the offsets used to store superblocks in > a regular btrfs volume (no zoned case) are also reserved to avoid > confusion. > > Note that we only reserve the 2 zones per primary/copy actually used for > superblock logging. We don't reserve the ranges possibly containing > superblock with the largest supported zone size (0-16GB, 64G-80GB, > 256G-272GB). > > The first copy position is much larger than for a regular btrfs volume > (64M). This increase is to avoid overlapping with the log zones for the > primary superblock. This higher location is arbitrary but allows supporting > devices with very large zone size, up to 32GB. But we only allow zone sizes > up to 8GB for now. > > Signed-off-by: Naohiro Aota <naohiro.a...@wdc.com> > --- > fs/btrfs/zoned.c | 39 +++++++++++++++++++++++++++++++-------- > 1 file changed, 31 insertions(+), 8 deletions(-) > > diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c > index 43948bd40e02..6a72ca1f7988 100644 > --- a/fs/btrfs/zoned.c > +++ b/fs/btrfs/zoned.c > @@ -21,9 +21,24 @@ > /* Pseudo write pointer value for conventional zone */ > #define WP_CONVENTIONAL ((u64)-2) > > +/* > + * Location of the first zone of superblock logging zone pairs. > + * - Primary superblock: the zone containing offset 0 (zone 0) > + * - First superblock copy: the zone containing offset 64G > + * - Second superblock copy: the zone containing offset 256G > + */ > +#define BTRFS_PRIMARY_SB_LOG_ZONE 0ULL > +#define BTRFS_FIRST_SB_LOG_ZONE (64ULL * SZ_1G) > +#define BTRFS_SECOND_SB_LOG_ZONE (256ULL * SZ_1G) > +#define BTRFS_FIRST_SB_LOG_ZONE_SHIFT const_ilog2(BTRFS_FIRST_SB_LOG_ZONE) > +#define BTRFS_SECOND_SB_LOG_ZONE_SHIFT const_ilog2(BTRFS_SECOND_SB_LOG_ZONE) > + > /* Number of superblock log zones */ > #define BTRFS_NR_SB_LOG_ZONES 2 > > +/* Max size of supported zone size */ > +#define BTRFS_MAX_ZONE_SIZE SZ_8G > + > static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void > *data) > { > struct blk_zone *zones = data; > @@ -111,11 +126,8 @@ static int sb_write_pointer(struct block_device *bdev, > struct blk_zone *zones, > } > > /* > - * The following zones are reserved as the circular buffer on ZONED btrfs. > - * - The primary superblock: zones 0 and 1 > - * - The first copy: zones 16 and 17 > - * - The second copy: zones 1024 or zone at 256GB which is minimum, and > - * the following one > + * Get the zone number of the first zone of a pair of contiguous zones used > + * for superblock logging. > */ > static inline u32 sb_zone_number(int shift, int mirror) > { > @@ -123,8 +135,8 @@ static inline u32 sb_zone_number(int shift, int mirror) > > switch (mirror) { > case 0: return 0; > - case 1: return 16; > - case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024); > + case 1: return 1 << (BTRFS_FIRST_SB_LOG_ZONE_SHIFT - shift); > + case 2: return 1 << (BTRFS_SECOND_SB_LOG_ZONE_SHIFT - shift); > } > > return 0; > @@ -300,10 +312,21 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) > zone_sectors = bdev_zone_sectors(bdev); > } > > - nr_sectors = bdev_nr_sectors(bdev); > /* Check if it's power of 2 (see is_power_of_2) */ > ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); > zone_info->zone_size = zone_sectors << SECTOR_SHIFT; > + > + /* We reject devices with a zone size larger than 8GB. */ > + if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { > + btrfs_err_in_rcu(fs_info, > + "zoned: %s: zone size %llu is too large", > + rcu_str_deref(device->name), > + zone_info->zone_size); > + ret = -EINVAL; > + goto out; > + } > + > + nr_sectors = bdev_nr_sectors(bdev); > zone_info->zone_size_shift = ilog2(zone_info->zone_size); > zone_info->max_zone_append_size = > (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT; > -- Damien Le Moal Western Digital Research