NVM Express Zoned Namespace (ZNS) devices can have zone-capacity(zc) less
than the zone-size. ZNS defines a per zone capacity which can be equal
or less than the zone-size. Zone-capacity is the number of usable blocks
in the zone. In such cases, the filesystem should not write/read beyond the
zone-capacity. Update the super block with the usable number of blocks
and free segment count in the ZNS device zones, if zone-capacity is less
than zone-size. Set reserved segment count and overprovision ratio based
on the usable segments in the zone.

Signed-off-by: Aravind Ramesh <aravind.ram...@wdc.com>
Signed-off-by: Shin'ichiro Kawasaki <shinichiro.kawas...@wdc.com>
---
 configure.ac        |  4 ++++
 include/f2fs_fs.h   | 40 +++++++++++++++++++++++++++++++----
 lib/libf2fs_io.c    |  1 +
 lib/libf2fs_zoned.c | 51 +++++++++++++++++++++++++++++++++++++++++++--
 mkfs/f2fs_format.c  | 26 ++++++++++++++++++-----
 5 files changed, 111 insertions(+), 11 deletions(-)

diff --git a/configure.ac b/configure.ac
index 9ac0c24..e9acd1a 100644
--- a/configure.ac
+++ b/configure.ac
@@ -213,6 +213,10 @@ AC_CONFIG_FILES([
        tools/f2fs_io/Makefile
 ])
 
+AC_CHECK_MEMBER([struct blk_zone.capacity],
+               [AC_DEFINE(HAVE_BLK_ZONE_REP_V2, [1], [report zones includes 
zone capacity])],
+               [], [[#include <linux/blkzoned.h>]])
+
 # export library version info for mkfs/libf2fs_format_la
 AC_SUBST(FMT_CURRENT, 6)
 AC_SUBST(FMT_REVISION, 0)
diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index 709bfd8..40165ed 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -332,6 +332,7 @@ struct device_info {
        u_int32_t nr_zones;
        u_int32_t nr_rnd_zones;
        size_t zone_blocks;
+       size_t *zone_cap_blocks;
 };
 
 typedef struct {
@@ -1324,13 +1325,42 @@ blk_zone_cond_str(struct blk_zone *blkz)
        return "Unknown-cond";
 }
 
-#define blk_zone_empty(z)      (blk_zone_cond(z) == BLK_ZONE_COND_EMPTY)
+/*
+ * Handle kernel zone capacity support
+ */
+#ifndef HAVE_BLK_ZONE_REP_V2
+#define BLK_ZONE_REP_CAPACITY   (1 << 0)
+struct blk_zone_v2 {
+       __u64   start;          /* Zone start sector */
+       __u64   len;            /* Zone length in number of sectors */
+       __u64   wp;             /* Zone write pointer position */
+       __u8    type;           /* Zone type */
+       __u8    cond;           /* Zone condition */
+       __u8    non_seq;        /* Non-sequential write resources active */
+       __u8    reset;          /* Reset write pointer recommended */
+       __u8    resv[4];
+       __u64   capacity;       /* Zone capacity in number of sectors */
+       __u8    reserved[24];
+};
+#define blk_zone blk_zone_v2
 
+struct blk_zone_report_v2 {
+       __u64   sector;
+       __u32   nr_zones;
+       __u32   flags;
+struct blk_zone zones[0];
+};
+#define blk_zone_report blk_zone_report_v2
+#endif /* HAVE_BLK_ZONE_REP_V2 */
+
+#define blk_zone_empty(z)      (blk_zone_cond(z) == BLK_ZONE_COND_EMPTY)
 #define blk_zone_sector(z)     (z)->start
 #define blk_zone_length(z)     (z)->len
 #define blk_zone_wp_sector(z)  (z)->wp
 #define blk_zone_need_reset(z) (int)(z)->reset
 #define blk_zone_non_seq(z)    (int)(z)->non_seq
+#define blk_zone_capacity(z, f) ((f & BLK_ZONE_REP_CAPACITY) ? \
+                                       (z)->capacity : (z)->len)
 
 #endif
 
@@ -1342,6 +1372,7 @@ extern int f2fs_report_zones(int, report_zones_cb_t *, 
void *);
 extern int f2fs_check_zones(int);
 int f2fs_reset_zone(int, void *);
 extern int f2fs_reset_zones(int);
+extern uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb);
 
 #define SIZE_ALIGN(val, size)  ((val) + (size) - 1) / (size)
 #define SEG_ALIGN(blks)                SIZE_ALIGN(blks, c.blks_per_seg)
@@ -1352,6 +1383,7 @@ static inline double get_best_overprovision(struct 
f2fs_super_block *sb)
 {
        double reserved, ovp, candidate, end, diff, space;
        double max_ovp = 0, max_space = 0;
+       u_int32_t usable_main_segs = f2fs_get_usable_segments(sb);
 
        if (get_sb(segment_count_main) < 256) {
                candidate = 10;
@@ -1365,9 +1397,9 @@ static inline double get_best_overprovision(struct 
f2fs_super_block *sb)
 
        for (; candidate <= end; candidate += diff) {
                reserved = (2 * (100 / candidate + 1) + 6) *
-                                               get_sb(segs_per_sec);
-               ovp = (get_sb(segment_count_main) - reserved) * candidate / 100;
-               space = get_sb(segment_count_main) - reserved - ovp;
+                               (usable_main_segs / get_sb(section_count));
+               ovp = (usable_main_segs - reserved) * candidate / 100;
+               space = usable_main_segs - reserved - ovp;
                if (max_space < space) {
                        max_space = space;
                        max_ovp = candidate;
diff --git a/lib/libf2fs_io.c b/lib/libf2fs_io.c
index 1f597a9..138285d 100644
--- a/lib/libf2fs_io.c
+++ b/lib/libf2fs_io.c
@@ -784,6 +784,7 @@ int f2fs_finalize_device(void)
                        break;
                }
                free(c.devices[i].path);
+               free(c.devices[i].zone_cap_blocks);
        }
        close(c.kd);
 
diff --git a/lib/libf2fs_zoned.c b/lib/libf2fs_zoned.c
index efc687c..f98fcdb 100644
--- a/lib/libf2fs_zoned.c
+++ b/lib/libf2fs_zoned.c
@@ -291,6 +291,13 @@ int f2fs_check_zones(int j)
                return -ENOMEM;
        }
 
+       dev->zone_cap_blocks = malloc(dev->nr_zones * sizeof(size_t));
+       if (!dev->zone_cap_blocks) {
+               ERR_MSG("No memory for zone capacity list.\n");
+               return -ENOMEM;
+       }
+       memset(dev->zone_cap_blocks, 0, (dev->nr_zones * sizeof(size_t)));
+
        dev->nr_rnd_zones = 0;
        sector = 0;
        total_sectors = (dev->total_sectors * c.sector_size) >> 9;
@@ -335,10 +342,15 @@ int f2fs_check_zones(int j)
                                    blk_zone_cond_str(blkz),
                                    blk_zone_sector(blkz),
                                    blk_zone_length(blkz));
+                               dev->zone_cap_blocks[n] =
+                                       blk_zone_length(blkz) >>
+                                       (F2FS_BLKSIZE_BITS - SECTOR_SHIFT);
                        } else {
                                DBG(2,
-                                   "Zone %05u: type 0x%x (%s), cond 0x%x (%s), 
need_reset %d, "
-                                   "non_seq %d, sector %llu, %llu sectors, wp 
sector %llu\n",
+                                   "Zone %05u: type 0x%x (%s), cond 0x%x (%s),"
+                                   " need_reset %d, non_seq %d, sector %llu,"
+                                   " %llu sectors, capacity %llu,"
+                                   " wp sector %llu\n",
                                    n,
                                    blk_zone_type(blkz),
                                    blk_zone_type_str(blkz),
@@ -348,7 +360,11 @@ int f2fs_check_zones(int j)
                                    blk_zone_non_seq(blkz),
                                    blk_zone_sector(blkz),
                                    blk_zone_length(blkz),
+                                   blk_zone_capacity(blkz, rep->flags),
                                    blk_zone_wp_sector(blkz));
+                               dev->zone_cap_blocks[n] =
+                                       blk_zone_capacity(blkz, rep->flags) >>
+                                       (F2FS_BLKSIZE_BITS - SECTOR_SHIFT);
                        }
 
                        sector = blk_zone_sector(blkz) + blk_zone_length(blkz);
@@ -473,6 +489,33 @@ out:
        return ret;
 }
 
+uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
+{
+#ifdef HAVE_BLK_ZONE_REP_V2
+       int i, j;
+       uint32_t usable_segs = 0, zone_segs;
+       for (i = 0; i < c.ndevs; i++) {
+               if (c.devices[i].zoned_model != F2FS_ZONED_HM) {
+                       usable_segs += c.devices[i].total_segments;
+                       continue;
+               }
+               for (j = 0; j < c.devices[i].nr_zones; j++) {
+                       zone_segs = c.devices[i].zone_cap_blocks[j] >>
+                                       get_sb(log_blocks_per_seg);
+                       if (c.devices[i].zone_cap_blocks[j] %
+                                               DEFAULT_BLOCKS_PER_SEGMENT)
+                               usable_segs += zone_segs + 1;
+                       else
+                               usable_segs += zone_segs;
+               }
+       }
+       usable_segs -= (get_sb(main_blkaddr) - get_sb(segment0_blkaddr)) >>
+                                               get_sb(log_blocks_per_seg);
+       return usable_segs;
+#endif
+       return get_sb(segment_count_main);
+}
+
 #else
 
 int f2fs_report_zone(int i, u_int64_t UNUSED(sector), void *UNUSED(blkzone))
@@ -527,5 +570,9 @@ int f2fs_reset_zones(int i)
        return -1;
 }
 
+uint32_t f2fs_get_usable_segments(struct f2fs_super_block *sb)
+{
+       return get_sb(segment_count_main);
+}
 #endif
 
diff --git a/mkfs/f2fs_format.c b/mkfs/f2fs_format.c
index 4999cac..74a81c8 100644
--- a/mkfs/f2fs_format.c
+++ b/mkfs/f2fs_format.c
@@ -425,13 +425,19 @@ static int f2fs_prepare_super_block(void)
 
        set_sb(segment_count_main, get_sb(section_count) * c.segs_per_sec);
 
-       /* Let's determine the best reserved and overprovisioned space */
+       /*
+        * Let's determine the best reserved and overprovisioned space.
+        * For Zoned device, if zone capacity less than zone size, the segments
+        * starting after the zone capacity are unusable in each zone. So get
+        * overprovision ratio and reserved seg count based on avg usable
+        * segs_per_sec.
+        */
        if (c.overprovision == 0)
                c.overprovision = get_best_overprovision(sb);
 
        c.reserved_segments =
-                       (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE)
-                       * c.segs_per_sec;
+                       (2 * (100 / c.overprovision + 1) + NR_CURSEG_TYPE) *
+                       (f2fs_get_usable_segments(sb) / get_sb(section_count));
 
        if (c.overprovision == 0 || c.total_segments < F2FS_MIN_SEGMENTS ||
                (c.devices[0].total_sectors *
@@ -672,19 +678,29 @@ static int f2fs_write_check_point_pack(void)
        set_cp(valid_block_count, 2 + c.quota_inum + c.quota_dnum +
                        c.lpf_inum + c.lpf_dnum);
        set_cp(rsvd_segment_count, c.reserved_segments);
-       set_cp(overprov_segment_count, (get_sb(segment_count_main) -
+
+       /*
+        * For zoned devices, if zone capacity less than zone size, get
+        * overprovision segment count based on usable segments in the device.
+        */
+       set_cp(overprov_segment_count, (f2fs_get_usable_segments(sb) -
                        get_cp(rsvd_segment_count)) *
                        c.overprovision / 100);
        set_cp(overprov_segment_count, get_cp(overprov_segment_count) +
                        get_cp(rsvd_segment_count));
 
+       if (f2fs_get_usable_segments(sb) < (get_cp(rsvd_segment_count) +
+                                       get_cp(overprov_segment_count))) {
+               MSG(0, "\tError: Not enough segments to create F2FS Volume\n");
+               goto free_nat_bits;
+       }
        MSG(0, "Info: Overprovision ratio = %.3lf%%\n", c.overprovision);
        MSG(0, "Info: Overprovision segments = %u (GC reserved = %u)\n",
                                        get_cp(overprov_segment_count),
                                        c.reserved_segments);
 
        /* main segments - reserved segments - (node + data segments) */
-       set_cp(free_segment_count, get_sb(segment_count_main) - 6);
+       set_cp(free_segment_count, f2fs_get_usable_segments(sb) - 6);
        set_cp(user_block_count, ((get_cp(free_segment_count) + 6 -
                        get_cp(overprov_segment_count)) * c.blks_per_seg));
        /* cp page (2), data summaries (1), node summaries (3) */
-- 
2.19.1



_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to