On 2020/7/2 23:54, Aravind Ramesh wrote:
> NVMe Zoned Namespace devices can have zone-capacity less than zone-size.
> Zone-capacity indicates the maximum number of sectors that are usable in
> a zone beginning from the first sector of the zone. This makes the sectors
> sectors after the zone-capacity till zone-size to be unusable.
> This patch set tracks zone-size and zone-capacity in zoned devices and
> calculate the usable blocks per segment and usable segments per section.
>
> If zone-capacity is less than zone-size mark only those segments which
> start before zone-capacity as free segments. All segments at and beyond
> zone-capacity are treated as permanently used segments. In cases where
> zone-capacity does not align with segment size the last segment will start
> before zone-capacity and end beyond the zone-capacity of the zone. For
> such spanning segments only sectors within the zone-capacity are used.
>
> Signed-off-by: Aravind Ramesh <[email protected]>
> Signed-off-by: Damien Le Moal <[email protected]>
> Signed-off-by: Niklas Cassel <[email protected]>
> ---
> fs/f2fs/f2fs.h | 5 ++
> fs/f2fs/segment.c | 136 ++++++++++++++++++++++++++++++++++++++++++++--
> fs/f2fs/segment.h | 6 +-
> fs/f2fs/super.c | 41 ++++++++++++--
> 4 files changed, 176 insertions(+), 12 deletions(-)
>
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index e6e47618a357..73219e4e1ba4 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1232,6 +1232,7 @@ struct f2fs_dev_info {
> #ifdef CONFIG_BLK_DEV_ZONED
> unsigned int nr_blkz; /* Total number of zones */
> unsigned long *blkz_seq; /* Bitmap indicating sequential zones */
> + block_t *zone_capacity_blocks; /* Array of zone capacity in blks */
> #endif
> };
>
> @@ -3395,6 +3396,10 @@ void f2fs_destroy_segment_manager_caches(void);
> int f2fs_rw_hint_to_seg_type(enum rw_hint hint);
> enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
> enum page_type type, enum temp_type temp);
> +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
> + unsigned int segno);
> +unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
> + unsigned int segno);
>
> /*
> * checkpoint.c
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index c35614d255e1..d2156f3f56a5 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -4294,9 +4294,12 @@ static void init_free_segmap(struct f2fs_sb_info *sbi)
> {
> unsigned int start;
> int type;
> + struct seg_entry *sentry;
>
> for (start = 0; start < MAIN_SEGS(sbi); start++) {
> - struct seg_entry *sentry = get_seg_entry(sbi, start);
> + if (f2fs_usable_blks_in_seg(sbi, start) == 0)
If usable blocks count is zero, shouldn't we update
SIT_I(sbi)->written_valid_blocks
as we did when there is partial usable block in current segment?
> + continue;
> + sentry = get_seg_entry(sbi, start);
> if (!sentry->valid_blocks)
> __set_free(sbi, start);
> else
> @@ -4316,7 +4319,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
> struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
> struct free_segmap_info *free_i = FREE_I(sbi);
> unsigned int segno = 0, offset = 0, secno;
> - unsigned short valid_blocks;
> + unsigned short valid_blocks, usable_blks_in_seg;
> unsigned short blks_per_sec = BLKS_PER_SEC(sbi);
>
> while (1) {
> @@ -4326,9 +4329,10 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
> break;
> offset = segno + 1;
> valid_blocks = get_valid_blocks(sbi, segno, false);
> - if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
> + usable_blks_in_seg = f2fs_usable_blks_in_seg(sbi, segno);
> + if (valid_blocks == usable_blks_in_seg || !valid_blocks)
It needs to traverse .cur_valid_map bitmap to check whether blocks in range of
[0, usable_blks_in_seg] are all valid or not, if there is at least one usable
block in the range, segment should be dirty.
One question, if we select dirty segment which across zone-capacity as opened
segment (in curseg), how can we avoid allocating usable block beyong
zone-capacity
in such segment via .cur_valid_map?
> continue;
> - if (valid_blocks > sbi->blocks_per_seg) {
> + if (valid_blocks > usable_blks_in_seg) {
> f2fs_bug_on(sbi, 1);
> continue;
> }
> @@ -4678,6 +4682,101 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
>
> return 0;
> }
> +
> +static bool is_conv_zone(struct f2fs_sb_info *sbi, unsigned int zone_idx,
> + unsigned int dev_idx)
> +{
> + if (!bdev_is_zoned(FDEV(dev_idx).bdev))
> + return true;
> + return !test_bit(zone_idx, FDEV(dev_idx).blkz_seq);
> +}
> +
> +/* Return the zone index in the given device */
> +static unsigned int get_zone_idx(struct f2fs_sb_info *sbi, unsigned int
> secno,
> + int dev_idx)
> +{
> + block_t sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi,
> secno));
> +
> + return (sec_start_blkaddr - FDEV(dev_idx).start_blk) >>
> + sbi->log_blocks_per_blkz;
> +}
> +
> +/*
> + * Return the usable segments in a section based on the zone's
> + * corresponding zone capacity. Zone is equal to a section.
> + */
> +static inline unsigned int f2fs_usable_zone_segs_in_sec(
> + struct f2fs_sb_info *sbi, unsigned int segno)
> +{
> + unsigned int dev_idx, zone_idx, unusable_segs_in_sec;
> +
> + dev_idx = f2fs_target_device_index(sbi, START_BLOCK(sbi, segno));
> + zone_idx = get_zone_idx(sbi, GET_SEC_FROM_SEG(sbi, segno), dev_idx);
> +
> + /* Conventional zone's capacity is always equal to zone size */
> + if (is_conv_zone(sbi, zone_idx, dev_idx))
> + return sbi->segs_per_sec;
> +
> + /*
> + * If the zone_capacity_blocks array is NULL, then zone capacity
> + * is equal to the zone size for all zones
> + */
> + if (!FDEV(dev_idx).zone_capacity_blocks)
> + return sbi->segs_per_sec;
> +
> + /* Get the segment count beyond zone capacity block */
> + unusable_segs_in_sec = (sbi->blocks_per_blkz -
> + FDEV(dev_idx).zone_capacity_blocks[zone_idx]) >>
> + sbi->log_blocks_per_seg;
> + return sbi->segs_per_sec - unusable_segs_in_sec;
> +}
> +
> +/*
> + * Return the number of usable blocks in a segment. The number of blocks
> + * returned is always equal to the number of blocks in a segment for
> + * segments fully contained within a sequential zone capacity or a
> + * conventional zone. For segments partially contained in a sequential
> + * zone capacity, the number of usable blocks up to the zone capacity
> + * is returned. 0 is returned in all other cases.
> + */
> +static inline unsigned int f2fs_usable_zone_blks_in_seg(
> + struct f2fs_sb_info *sbi, unsigned int segno)
> +{
> + block_t seg_start, sec_start_blkaddr, sec_cap_blkaddr;
> + unsigned int zone_idx, dev_idx, secno;
> +
> + secno = GET_SEC_FROM_SEG(sbi, segno);
> + seg_start = START_BLOCK(sbi, segno);
> + dev_idx = f2fs_target_device_index(sbi, seg_start);
> + zone_idx = get_zone_idx(sbi, secno, dev_idx);
> +
> + /*
> + * Conventional zone's capacity is always equal to zone size,
> + * so, blocks per segment is unchanged.
> + */
> + if (is_conv_zone(sbi, zone_idx, dev_idx))
> + return sbi->blocks_per_seg;
> +
> + if (!FDEV(dev_idx).zone_capacity_blocks)
> + return sbi->blocks_per_seg;
> +
> + sec_start_blkaddr = START_BLOCK(sbi, GET_SEG_FROM_SEC(sbi, secno));
> + sec_cap_blkaddr = sec_start_blkaddr +
> + FDEV(dev_idx).zone_capacity_blocks[zone_idx];
> +
> + /*
> + * If segment starts before zone capacity and spans beyond
> + * zone capacity, then usable blocks are from seg start to
> + * zone capacity. If the segment starts after the zone capacity,
> + * then there are no usable blocks.
> + */
> + if (seg_start >= sec_cap_blkaddr)
> + return 0;
> + if (seg_start + sbi->blocks_per_seg > sec_cap_blkaddr)
> + return sec_cap_blkaddr - seg_start;
> +
> + return sbi->blocks_per_seg;
> +}
> #else
> int f2fs_fix_curseg_write_pointer(struct f2fs_sb_info *sbi)
> {
> @@ -4688,7 +4787,36 @@ int f2fs_check_write_pointer(struct f2fs_sb_info *sbi)
> {
> return 0;
> }
> +
> +static inline unsigned int f2fs_usable_zone_blks_in_seg(struct f2fs_sb_info
> *sbi,
> + unsigned int segno)
> +{
> + return 0;
> +}
> +
> +static inline unsigned int f2fs_usable_zone_segs_in_sec(struct f2fs_sb_info
> *sbi,
> + unsigned int segno)
> +{
> + return 0;
> +}
> #endif
> +unsigned int f2fs_usable_blks_in_seg(struct f2fs_sb_info *sbi,
> + unsigned int segno)
> +{
> + if (f2fs_sb_has_blkzoned(sbi))
> + return f2fs_usable_zone_blks_in_seg(sbi, segno);
> +
> + return sbi->blocks_per_seg;
> +}
> +
> +unsigned int f2fs_usable_segs_in_sec(struct f2fs_sb_info *sbi,
> + unsigned int segno)
> +{
> + if (f2fs_sb_has_blkzoned(sbi))
> + return f2fs_usable_zone_segs_in_sec(sbi, segno);
> +
> + return sbi->segs_per_sec;
> +}
>
> /*
> * Update min, max modified time for cost-benefit GC algorithm
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index f261e3e6a69b..79b0dc33feaf 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -411,6 +411,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi,
> unsigned int segno)
> unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
> unsigned int next;
> + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
>
> spin_lock(&free_i->segmap_lock);
> clear_bit(segno, free_i->free_segmap);
> @@ -418,7 +419,7 @@ static inline void __set_free(struct f2fs_sb_info *sbi,
> unsigned int segno)
>
> next = find_next_bit(free_i->free_segmap,
> start_segno + sbi->segs_per_sec, start_segno);
> - if (next >= start_segno + sbi->segs_per_sec) {
> + if (next >= start_segno + usable_segs) {
> clear_bit(secno, free_i->free_secmap);
> free_i->free_sections++;
> }
> @@ -444,6 +445,7 @@ static inline void __set_test_and_free(struct
> f2fs_sb_info *sbi,
> unsigned int secno = GET_SEC_FROM_SEG(sbi, segno);
> unsigned int start_segno = GET_SEG_FROM_SEC(sbi, secno);
> unsigned int next;
> + unsigned int usable_segs = f2fs_usable_segs_in_sec(sbi, segno);
>
> spin_lock(&free_i->segmap_lock);
> if (test_and_clear_bit(segno, free_i->free_segmap)) {
> @@ -453,7 +455,7 @@ static inline void __set_test_and_free(struct
> f2fs_sb_info *sbi,
> goto skip_free;
> next = find_next_bit(free_i->free_segmap,
> start_segno + sbi->segs_per_sec, start_segno);
> - if (next >= start_segno + sbi->segs_per_sec) {
> + if (next >= start_segno + usable_segs) {
> if (test_and_clear_bit(secno, free_i->free_secmap))
> free_i->free_sections++;
> }
> diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
> index 80cb7cd358f8..2686b07ae7eb 100644
> --- a/fs/f2fs/super.c
> +++ b/fs/f2fs/super.c
> @@ -1164,6 +1164,7 @@ static void destroy_device_list(struct f2fs_sb_info
> *sbi)
> blkdev_put(FDEV(i).bdev, FMODE_EXCL);
> #ifdef CONFIG_BLK_DEV_ZONED
> kvfree(FDEV(i).blkz_seq);
> + kvfree(FDEV(i).zone_capacity_blocks);
Now, f2fs_kzalloc won't allocate vmalloc's memory, so it's safe to use kfree().
> #endif
> }
> kvfree(sbi->devs);
> @@ -3039,13 +3040,26 @@ static int init_percpu_info(struct f2fs_sb_info *sbi)
> }
>
> #ifdef CONFIG_BLK_DEV_ZONED
> +
> +struct f2fs_report_zones_args {
> + struct f2fs_dev_info *dev;
> + bool zone_cap_mismatch;
> +};
> +
> static int f2fs_report_zone_cb(struct blk_zone *zone, unsigned int idx,
> - void *data)
> + void *data)
> {
> - struct f2fs_dev_info *dev = data;
> + struct f2fs_report_zones_args *rz_args = data;
> +
> + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
> + return 0;
> +
> + set_bit(idx, rz_args->dev->blkz_seq);
> + rz_args->dev->zone_capacity_blocks[idx] = zone->capacity >>
> + F2FS_LOG_SECTORS_PER_BLOCK;
> + if (zone->len != zone->capacity && !rz_args->zone_cap_mismatch)
> + rz_args->zone_cap_mismatch = true;
>
> - if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL)
> - set_bit(idx, dev->blkz_seq);
> return 0;
> }
>
> @@ -3053,6 +3067,7 @@ static int init_blkz_info(struct f2fs_sb_info *sbi, int
> devi)
> {
> struct block_device *bdev = FDEV(devi).bdev;
> sector_t nr_sectors = bdev->bd_part->nr_sects;
> + struct f2fs_report_zones_args rep_zone_arg;
> int ret;
>
> if (!f2fs_sb_has_blkzoned(sbi))
> @@ -3078,12 +3093,26 @@ static int init_blkz_info(struct f2fs_sb_info *sbi,
> int devi)
> if (!FDEV(devi).blkz_seq)
> return -ENOMEM;
>
> - /* Get block zones type */
> + /* Get block zones type and zone-capacity */
> + FDEV(devi).zone_capacity_blocks = f2fs_kzalloc(sbi,
> + FDEV(devi).nr_blkz * sizeof(block_t),
> + GFP_KERNEL);
> + if (!FDEV(devi).zone_capacity_blocks)
> + return -ENOMEM;
> +
> + rep_zone_arg.dev = &FDEV(devi);
> + rep_zone_arg.zone_cap_mismatch = false;
> +
> ret = blkdev_report_zones(bdev, 0, BLK_ALL_ZONES, f2fs_report_zone_cb,
> - &FDEV(devi));
> + &rep_zone_arg);
> if (ret < 0)
> return ret;
Missed to call kfree(FDEV(devi).zone_capacity_blocks)?
>
> + if (!rep_zone_arg.zone_cap_mismatch) {
> + kvfree(FDEV(devi).zone_capacity_blocks);
Ditto, kfree().
Thanks,
> + FDEV(devi).zone_capacity_blocks = NULL;
> + }
> +
> return 0;
> }
> #endif
>
_______________________________________________
Linux-f2fs-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel