> -----Original Message-----
> From: Chao Yu <yuch...@huawei.com>
> Sent: Friday, July 17, 2020 12:58 PM
> To: Aravind Ramesh <aravind.ram...@wdc.com>; jaeg...@kernel.org; linux-f2fs-
> de...@lists.sourceforge.net
> Cc: Shinichiro Kawasaki <shinichiro.kawas...@wdc.com>; Matias Bjorling
> <matias.bjorl...@wdc.com>
> Subject: Re: [PATCH 1/2] mkfs.f2fs: zns zone-capacity support.
> 
> On 2020/7/2 23:54, Aravind Ramesh wrote:
> > NVM Express Zoned Namespace (ZNS) devices can have zone-capacity(zc)
> > less than the zone-size. ZNS defines a per zone capacity which can be
> > equal or less than the zone-size. Zone-capacity is the number of
> > usable blocks in the zone. In such cases, the filesystem should not
> > write/read beyond the zone-capacity. Update the super block with the
> > usable number of blocks and free segment count in the ZNS device
> > zones, if zone-capacity is less than zone-size. Set reserved segment
> > count and overprovision ratio based on the usable segments in the zone.
> >
> > Signed-off-by: Aravind Ramesh <aravind.ram...@wdc.com>
> > Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com>
> > ---
> >  configure.ac        |  4 ++++
> >  include/f2fs_fs.h   | 40 +++++++++++++++++++++++++++++++----
> >  lib/libf2fs_io.c    |  1 +
> >  lib/libf2fs_zoned.c | 51
> > +++++++++++++++++++++++++++++++++++++++++++--
> >  mkfs/f2fs_format.c  | 26 ++++++++++++++++++-----
> >  5 files changed, 111 insertions(+), 11 deletions(-)
> >
> > diff --git a/configure.ac b/configure.ac index 9ac0c24..e9acd1a 100644
> > --- a/configure.ac
> > +++ b/configure.ac
> > @@ -213,6 +213,10 @@ AC_CONFIG_FILES([
> >     tools/f2fs_io/Makefile
> >  ])
> >
> > +AC_CHECK_MEMBER([struct blk_zone.capacity],
> > +           [AC_DEFINE(HAVE_BLK_ZONE_REP_V2, [1], [report zones includes
> zone capacity])],
> > +           [], [[#include <linux/blkzoned.h>]])
> > +
> >  # export library version info for mkfs/libf2fs_format_la
> > AC_SUBST(FMT_CURRENT, 6)  AC_SUBST(FMT_REVISION, 0) diff --git
> > a/include/f2fs_fs.h b/include/f2fs_fs.h index 709bfd8..40165ed 100644
> > --- a/include/f2fs_fs.h
> > +++ b/include/f2fs_fs.h
> > @@ -332,6 +332,7 @@ struct device_info {
> >     u_int32_t nr_zones;
> >     u_int32_t nr_rnd_zones;
> >     size_t zone_blocks;
> > +   size_t *zone_cap_blocks;
> >  };
> >
> >  typedef struct {
> > @@ -1324,13 +1325,42 @@ blk_zone_cond_str(struct blk_zone *blkz)
> >     return "Unknown-cond";
> >  }
> >
> > -#define blk_zone_empty(z)  (blk_zone_cond(z) == BLK_ZONE_COND_EMPTY)
> > +/*
> > + * Handle kernel zone capacity support  */ #ifndef
> > +HAVE_BLK_ZONE_REP_V2
> > +#define BLK_ZONE_REP_CAPACITY   (1 << 0)
> > +struct blk_zone_v2 {
> > +   __u64   start;          /* Zone start sector */
> > +   __u64   len;            /* Zone length in number of sectors */
> > +   __u64   wp;             /* Zone write pointer position */
> > +   __u8    type;           /* Zone type */
> > +   __u8    cond;           /* Zone condition */
> > +   __u8    non_seq;        /* Non-sequential write resources active */
> > +   __u8    reset;          /* Reset write pointer recommended */
> > +   __u8    resv[4];
> > +   __u64   capacity;       /* Zone capacity in number of sectors */
> > +   __u8    reserved[24];
> > +};
> > +#define blk_zone blk_zone_v2
> >
> > +struct blk_zone_report_v2 {
> > +   __u64   sector;
> > +   __u32   nr_zones;
> > +   __u32   flags;
> > +struct blk_zone zones[0];
> > +};

[snip...]

> > @@ -1352,6 +1383,7 @@ static inline double
> > get_best_overprovision(struct f2fs_super_block *sb)  {
> >     double reserved, ovp, candidate, end, diff, space;
> >     double max_ovp = 0, max_space = 0;
> > +   u_int32_t usable_main_segs = f2fs_get_usable_segments(sb);
> >
> >     if (get_sb(segment_count_main) < 256) {
> >             candidate = 10;
> > @@ -1365,9 +1397,9 @@ static inline double
> > get_best_overprovision(struct f2fs_super_block *sb)
> >
> >     for (; candidate <= end; candidate += diff) {
> >             reserved = (2 * (100 / candidate + 1) + 6) *
> > -                                           get_sb(segs_per_sec);
> > -           ovp = (get_sb(segment_count_main) - reserved) * candidate / 100;
> > -           space = get_sb(segment_count_main) - reserved - ovp;
> > +                           (usable_main_segs / get_sb(section_count));
> 
> It looks segs_per_sec becomes an average value in whole zns device, I'm not 
> sure
> whether calculating with divide round down way is safe enough to reserve 
> space, as
> it may cut several segments, which may be needed during foreground GC, so I
> suggest to use DIV_ROUND_UP() here to avoid boundary issue.

Yes, it tries to reflect the usable segs_per_sec rather than using a value 
based on zone-size,
which could, reserve more than the intended amount of segments.

I do see a round_down(x, y) in the f2fs-tools code. But no, round_up().

Does this look ok?
DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
(from kernel.h)

> 
> > +           ovp = (usable_main_segs - reserved) * candidate / 100;
> > +           space = usable_main_segs - reserved - ovp;
> >             if (max_space < space) {
> >                     max_space = space;
> >                     max_ovp = candidate;
> > diff --git a/lib/libf2fs_io.c b/lib/libf2fs_io.c index
> > 1f597a9..138285d 100644
> > --- a/lib/libf2fs_io.c
> > +++ b/lib/libf2fs_io.c
> > @@ -784,6 +784,7 @@ int f2fs_finalize_device(void)
> >                     break;
> >             }
> >             free(c.devices[i].path);
> > +           free(c.devices[i].zone_cap_blocks);
> >     }
> >     close(c.kd);
> >
> > diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c index
> > efc687c..f98fcdb 100644
> > --- a/lib/libf2fs_zoned.c
> > +++ b/lib/libf2fs_zoned.c
> > @@ -291,6 +291,13 @@ int f2fs_check_zones(int j)
> >             return -ENOMEM;
> >     }
> >
> > +   dev->zone_cap_blocks = malloc(dev->nr_zones * sizeof(size_t));
> > +   if (!dev->zone_cap_blocks) {
> > +           ERR_MSG("No memory for zone capacity list.\n");
> > +           return -ENOMEM;
> > +   }
> > +   memset(dev->zone_cap_blocks, 0, (dev->nr_zones * sizeof(size_t)));
> > +
> >     dev->nr_rnd_zones = 0;
> >     sector = 0;
> >     total_sectors = (dev->total_sectors * c.sector_size) >> 9; @@
> > -335,10 +342,15 @@ int f2fs_check_zones(int j)
> >                                 blk_zone_cond_str(blkz),
> >                                 blk_zone_sector(blkz),
> >                                 blk_zone_length(blkz));
> > +                           dev->zone_cap_blocks[n] =
> > +                                   blk_zone_length(blkz) >>
> > +                                   (F2FS_BLKSIZE_BITS - SECTOR_SHIFT);
> >                     } else {
> >                             DBG(2,
> > -                               "Zone %05u: type 0x%x (%s), cond 0x%x (%s),
> need_reset %d, "
> > -                               "non_seq %d, sector %llu, %llu sectors, wp 
> > sector
> %llu\n",
> > +                               "Zone %05u: type 0x%x (%s), cond 0x%x (%s),"
> > +                               " need_reset %d, non_seq %d, sector %llu,"
> > +                               " %llu sectors, capacity %llu,"
> > +                               " wp sector %llu\n",
> >                                 n,
> >                                 blk_zone_type(blkz),
> >                                 blk_zone_type_str(blkz),
> > @@ -348,7 +360,11 @@ int f2fs_check_zones(int j)
> >                                 blk_zone_non_seq(blkz),
> >                                 blk_zone_sector(blkz),
> >                                 blk_zone_length(blkz),
> > +                               blk_zone_capacity(blkz, rep->flags),
> >                                 blk_zone_wp_sector(blkz));
> > +                           dev->zone_cap_blocks[n] =
> > +                                   blk_zone_capacity(blkz, rep->flags) >>
> > +                                   (F2FS_BLKSIZE_BITS - SECTOR_SHIFT);
> >                     }
> >
> >                     sector = blk_zone_sector(blkz) + blk_zone_length(blkz);
> @@ -473,6
> > +489,33 @@ out:
> >     return ret;
> >  }
> >
> > +uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) {
> > +#ifdef HAVE_BLK_ZONE_REP_V2
> > +   int i, j;
> > +   uint32_t usable_segs = 0, zone_segs;
> > +   for (i = 0; i < c.ndevs; i++) {
> > +           if (c.devices[i].zoned_model != F2FS_ZONED_HM) {
> > +                   usable_segs += c.devices[i].total_segments;
> > +                   continue;
> > +           }
> > +           for (j = 0; j < c.devices[i].nr_zones; j++) {
> > +                   zone_segs = c.devices[i].zone_cap_blocks[j] >>
> > +                                   get_sb(log_blocks_per_seg);
> > +                   if (c.devices[i].zone_cap_blocks[j] %
> > +
>       DEFAULT_BLOCKS_PER_SEGMENT)
> > +                           usable_segs += zone_segs + 1;
> > +                   else
> > +                           usable_segs += zone_segs;
> > +           }
> > +   }
> > +   usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >>
> > +                                           get_sb(log_blocks_per_seg);
> > +   return usable_segs;
> > +#endif
> > +   return get_sb(segment_count_main);
> > +}
> > +
> >  #else
> >
> >  int f2fs_report_zone(int i, u_int64_t UNUSED(sector), void
> > *UNUSED(blkzone)) @@ -527,5 +570,9 @@ int f2fs_reset_zones(int i)
> >     return -1;
> >  }
> >
> > +uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb) {
> > +   return get_sb(segment_count_main);
> > +}
> >  #endif
> >
> > diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c index
> > 4999cac..74a81c8 100644
> > --- a/mkfs/f2fs_format.c
> > +++ b/mkfs/f2fs_format.c
> > @@ -425,13 +425,19 @@ static int f2fs_prepare_super_block(void)
> >
> >     set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec);
> >
> > -   /* Let's determine the best reserved and overprovisioned space */
> > +   /*
> > +    * Let's determine the best reserved and overprovisioned space.
> > +    * For Zoned device, if zone capacity less than zone size, the segments
> > +    * starting after the zone capacity are unusable in each zone. So get
> > +    * overprovision ratio and reserved seg count based on avg usable
> > +    * segs_per_sec.
> > +    */
> >     if (c.overprovision == 0)
> >             c.overprovision = get_best_overprovision(sb);
> >
> >     c.reserved_segments =
> > -                   (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE)
> > -                   * c.segs_per_sec;
> > +                   (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE) *
> > +                   (f2fs_get_usable_segments(sb) / get_sb(section_count));
> 
> Ditto, DIV_ROUND_UP()

Ok

> 
> >
> >     if (c.overprovision == 0 || c.total_segments < F2FS_MIN_SEGMENTS ||
> >             (c.devices[0].total_sectors *
> > @@ -672,19 +678,29 @@ static int f2fs_write_check_point_pack(void)
> >     set_cp(valid_block_count, 2 + c.quota_inum + c.quota_dnum +
> >                     c.lpf_inum + c.lpf_dnum);
> >     set_cp(rsvd_segment_count, c.reserved_segments);
> > -   set_cp(overprov_segment_count, (get_sb(segment_count_main) -
> > +
> > +   /*
> > +    * For zoned devices, if zone capacity less than zone size, get
> > +    * overprovision segment count based on usable segments in the device.
> > +    */
> > +   set_cp(overprov_segment_count, (f2fs_get_usable_segments(sb) -
> >                     get_cp(rsvd_segment_count)) *
> >                     c.overprovision / 100);
> >     set_cp(overprov_segment_count, get_cp(overprov_segment_count) +
> >                     get_cp(rsvd_segment_count));
> >
> > +   if (f2fs_get_usable_segments(sb) < (get_cp(rsvd_segment_count) +
> 
> equal is not allowed as well?

You are right, will make it " <= "

Thanks for the feedback,
Aravind
> 
> > +                                   get_cp(overprov_segment_count))) {
> > +           MSG(0, "\tError: Not enough segments to create F2FS Volume\n");
> > +           goto free_nat_bits;
> > +   }
> >     MSG(0, "Info: Overprovision ratio = %.3lf%%\n", c.overprovision);
> >     MSG(0, "Info: Overprovision segments = %u (GC reserved = %u)\n",
> >                                     get_cp(overprov_segment_count),
> >                                     c.reserved_segments);
> >
> >     /* main segments - reserved segments - (node + data segments) */
> > -   set_cp(free_segment_count, get_sb(segment_count_main) - 6);
> > +   set_cp(free_segment_count, f2fs_get_usable_segments(sb) - 6);
> >     set_cp(user_block_count, ((get_cp(free_segment_count) + 6 -
> >                     get_cp(overprov_segment_count)) * c.blks_per_seg));
> >     /* cp page (2), data summaries (1), node summaries (3) */
> >


_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to