[dm-devel] [PATCH 06/12] dm-zoned: add metadata pointer to struct dmz_dev

2020-05-22 Thread Hannes Reinecke
Add a metadata pointer to struct dmz_dev and use it as argument
for blkdev_report_zones() instead of the metadata itself.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 14 +-
 drivers/md/dm-zoned.h  |  7 ---
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 7b6e7404f1e8..73479b4c8bca 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1343,8 +1343,8 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
  */
 static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data)
 {
-   struct dmz_metadata *zmd = data;
-   struct dmz_dev *dev = zmd->nr_devs > 1 ? >dev[1] : >dev[0];
+   struct dmz_dev *dev = data;
+   struct dmz_metadata *zmd = dev->metadata;
int idx = num + dev->zone_offset;
struct dm_zone *zone = kzalloc(sizeof(struct dm_zone), GFP_KERNEL);
 
@@ -1480,8 +1480,12 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
 
/* Allocate zone array */
zmd->nr_zones = 0;
-   for (i = 0; i < zmd->nr_devs; i++)
-   zmd->nr_zones += zmd->dev[i].nr_zones;
+   for (i = 0; i < zmd->nr_devs; i++) {
+   struct dmz_dev *dev = >dev[i];
+
+   dev->metadata = zmd;
+   zmd->nr_zones += dev->nr_zones;
+   }
 
if (!zmd->nr_zones) {
DMERR("(%s): No zones found", zmd->devname);
@@ -1516,7 +1520,7 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
 * first randomly writable zone.
 */
ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES,
- dmz_init_zone, zmd);
+ dmz_init_zone, zoned_dev);
if (ret < 0) {
DMDEBUG("(%s): Failed to report zones, error %d",
zmd->devname, ret);
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index 356b436425e4..dab701893b67 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -45,11 +45,15 @@
 #define dmz_bio_block(bio) dmz_sect2blk((bio)->bi_iter.bi_sector)
 #define dmz_bio_blocks(bio)dmz_sect2blk(bio_sectors(bio))
 
+struct dmz_metadata;
+struct dmz_reclaim;
+
 /*
  * Zoned block device information.
  */
 struct dmz_dev {
struct block_device *bdev;
+   struct dmz_metadata *metadata;
 
charname[BDEVNAME_SIZE];
uuid_t  uuid;
@@ -168,9 +172,6 @@ enum {
 #define dmz_dev_debug(dev, format, args...)\
DMDEBUG("(%s): " format, (dev)->name, ## args)
 
-struct dmz_metadata;
-struct dmz_reclaim;
-
 /*
  * Functions defined in dm-zoned-metadata.c
  */
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 02/12] dm-zoned: convert to xarray

2020-05-22 Thread Hannes Reinecke
The zones array is getting really large, and large arrays
tend to wreak havoc with the caches.
So convert it to xarray to become more cache friendly.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 98 +++---
 1 file changed, 73 insertions(+), 25 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index b0d3ed4ac56a..3da6702bb1ae 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -172,7 +172,7 @@ struct dmz_metadata {
unsigned intnr_chunks;
 
/* Zone information array */
-   struct dm_zone  *zones;
+   struct xarray   zones;
 
struct dmz_sb   sb[3];
unsigned intmblk_primary;
@@ -327,6 +327,11 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata 
*zmd)
return atomic_read(>unmap_nr_seq);
 }
 
+static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
+{
+   return xa_load(>zones, zone_id);
+}
+
 const char *dmz_metadata_label(struct dmz_metadata *zmd)
 {
return (const char *)zmd->label;
@@ -1121,6 +1126,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
 {
unsigned int zone_nr_blocks = zmd->zone_nr_blocks;
struct dmz_mblock *mblk;
+   unsigned int zone_id = zmd->sb[0].zone->id;
int i;
 
/* Allocate a block */
@@ -1133,17 +1139,16 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
 
/* Bad first super block: search for the second one */
zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks;
-   zmd->sb[1].zone = zmd->sb[0].zone + 1;
+   zmd->sb[1].zone = xa_load(>zones, zone_id + 1);
zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
-   for (i = 0; i < zmd->nr_rnd_zones - 1; i++) {
+   for (i = 1; i < zmd->nr_rnd_zones; i++) {
if (dmz_read_sb(zmd, 1) != 0)
break;
-   if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC) {
-   zmd->sb[1].zone += i;
+   if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC)
return 0;
-   }
zmd->sb[1].block += zone_nr_blocks;
-   zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone + i);
+   zmd->sb[1].zone = dmz_get(zmd, zone_id + i);
+   zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
}
 
dmz_free_mblock(zmd, mblk);
@@ -1259,8 +1264,12 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
/* Read and check secondary super block */
if (ret == 0) {
sb_good[0] = true;
-   if (!zmd->sb[1].zone)
-   zmd->sb[1].zone = zmd->sb[0].zone + zmd->nr_meta_zones;
+   if (!zmd->sb[1].zone) {
+   unsigned int zone_id =
+   zmd->sb[0].zone->id + zmd->nr_meta_zones;
+
+   zmd->sb[1].zone = dmz_get(zmd, zone_id);
+   }
zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone);
zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
ret = dmz_get_sb(zmd, 1);
@@ -1341,7 +1350,12 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned 
int num, void *data)
struct dmz_metadata *zmd = data;
struct dmz_dev *dev = zmd->nr_devs > 1 ? >dev[1] : >dev[0];
int idx = num + dev->zone_offset;
-   struct dm_zone *zone = >zones[idx];
+   struct dm_zone *zone = kzalloc(sizeof(struct dm_zone), GFP_KERNEL);
+
+   if (!zone)
+   return -ENOMEM;
+   if (xa_insert(>zones, idx, zone, GFP_KERNEL))
+   return -EBUSY;
 
if (blkz->len != zmd->zone_nr_sectors) {
if (zmd->sb_version > 1) {
@@ -1397,14 +1411,18 @@ static int dmz_init_zone(struct blk_zone *blkz, 
unsigned int num, void *data)
return 0;
 }
 
-static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
+static int dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev)
 {
int idx;
sector_t zone_offset = 0;
 
for(idx = 0; idx < dev->nr_zones; idx++) {
-   struct dm_zone *zone = >zones[idx];
-
+   struct dm_zone *zone =
+   kzalloc(sizeof(struct dm_zone), GFP_KERNEL);
+   if (!zone)
+   return -ENOMEM;
+   if (xa_insert(>zones, idx, zone, GFP_KERNEL) < 0)
+   return -EBUSY;
INIT_LIST_HEAD(>link);
atomic_set(>refcount, 0);
zone->id = idx;
@@ -1420,6 +1438,7 @@ static void dmz_emulate_zones(struct dmz_metadata *zmd, 
struct dmz_dev *dev)
}
zone_offset += zmd->zone_nr_sectors;
}
+   return 0;
 }
 
 /*
@@ -1427,8 +1446,15 @@ static void dmz_emulate_zones(struct 

[dm-devel] [PATCH RFC 00/12] dm-zoned: multi-device support

2020-05-22 Thread Hannes Reinecke
Hi all,

on the risk of boring you to death, here's yet another RFC to update
dm-zoned. As it has seen only light testing and has some areas which
need to be improved I'd consider it RFC material.
I'm just putting it out now to get some feedback and get it ready for
the next merge window.

So, this patchset:
- Converts the zone array to using xarray for better scalability
- Separate out shared structures into per-device structure
- Lift the restriction of 2 devices to handle an arbitrary number
  of drives.

With this patchset I'm seeing a performance increase for writes from
an average of 150MB/s (with 2 drives) to 200MB/s (with 3 drives).

Hannes Reinecke (12):
  dm-zoned: add debugging message for reading superblocks
  dm-zoned: convert to xarray
  dm-zoned: use on-stack superblock for tertiary devices
  dm-zoned: secondary superblock must reside on the same devices than
primary superblock
  dm-zoned: add device pointer to struct dm_zone
  dm-zoned: add metadata pointer to struct dmz_dev
  dm-zoned: add a 'reserved' zone flag
  dm-zoned: move random and sequential zones into struct dmz_dev
  dm-zoned: improve logging messages for reclaim
  dm-zoned: support arbitrary number of devices
  dm-zoned: round-robin load balancer for reclaiming zones
  dm-zoned: per-device reclaim

 drivers/md/dm-zoned-metadata.c | 430 -
 drivers/md/dm-zoned-reclaim.c  |  85 
 drivers/md/dm-zoned-target.c   | 172 ++---
 drivers/md/dm-zoned.h  |  70 ---
 4 files changed, 454 insertions(+), 303 deletions(-)

-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 01/12] dm-zoned: add debugging message for reading superblocks

2020-05-22 Thread Hannes Reinecke
Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 4a2e351365c5..b0d3ed4ac56a 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1105,6 +1105,9 @@ static int dmz_check_sb(struct dmz_metadata *zmd, 
unsigned int set)
  */
 static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set)
 {
+   DMDEBUG("(%s): read superblock set %d dev %s block %llu",
+   zmd->devname, set, zmd->sb[set].dev->name,
+   zmd->sb[set].block);
return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ,
  zmd->sb[set].block, zmd->sb[set].mblk->page);
 }
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 09/12] dm-zoned: improve logging messages for reclaim

2020-05-22 Thread Hannes Reinecke
Instead of just reporting the errno this patch adds some more
verbose debugging message in the reclaim path.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-reclaim.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index d1a72b42dea2..fba0d48e38a7 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -367,8 +367,11 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
 
/* Get a data zone */
dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
-   if (!dzone)
+   if (!dzone) {
+   DMDEBUG("(%s): No zone found to reclaim",
+   dmz_metadata_label(zmd));
return -EBUSY;
+   }
 
start = jiffies;
if (dmz_is_cache(dzone) || dmz_is_rnd(dzone)) {
@@ -412,6 +415,12 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
}
 out:
if (ret) {
+   if (ret == -EINTR)
+   DMDEBUG("(%s): reclaim zone %u interrupted",
+   dmz_metadata_label(zmd), rzone->id);
+   else
+   DMDEBUG("(%s): Failed to reclaim zone %u, err %d",
+   dmz_metadata_label(zmd), rzone->id, ret);
dmz_unlock_zone_reclaim(dzone);
return ret;
}
@@ -515,8 +524,6 @@ static void dmz_reclaim_work(struct work_struct *work)
 
ret = dmz_do_reclaim(zrc);
if (ret && ret != -EINTR) {
-   DMDEBUG("(%s): Reclaim error %d",
-   dmz_metadata_label(zmd), ret);
if (!dmz_check_dev(zmd))
return;
}
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 07/12] dm-zoned: add a 'reserved' zone flag

2020-05-22 Thread Hannes Reinecke
Instead of counting the number of reserved zones in dmz_free_zone()
we should mark the zone as 'reserved' during allocation and simplify
dmz_free_zone().

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 4 ++--
 drivers/md/dm-zoned.h  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 73479b4c8bca..1b9da698a812 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1783,6 +1783,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
atomic_inc(>unmap_nr_rnd);
} else if (atomic_read(>nr_reserved_seq_zones) < 
zmd->nr_reserved_seq) {
list_add_tail(>link, 
>reserved_seq_zones_list);
+   set_bit(DMZ_RESERVED, >flags);
atomic_inc(>nr_reserved_seq_zones);
zmd->nr_seq--;
} else {
@@ -2210,8 +2211,7 @@ void dmz_free_zone(struct dmz_metadata *zmd, struct 
dm_zone *zone)
} else if (dmz_is_rnd(zone)) {
list_add_tail(>link, >unmap_rnd_list);
atomic_inc(>unmap_nr_rnd);
-   } else if (atomic_read(>nr_reserved_seq_zones) <
-  zmd->nr_reserved_seq) {
+   } else if (dmz_is_reserved(zone)) {
list_add_tail(>link, >reserved_seq_zones_list);
atomic_inc(>nr_reserved_seq_zones);
} else {
diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
index dab701893b67..983f5b5e9fa0 100644
--- a/drivers/md/dm-zoned.h
+++ b/drivers/md/dm-zoned.h
@@ -130,6 +130,7 @@ enum {
DMZ_META,
DMZ_DATA,
DMZ_BUF,
+   DMZ_RESERVED,
 
/* Zone internal state */
DMZ_RECLAIM,
@@ -147,6 +148,7 @@ enum {
 #define dmz_is_offline(z)  test_bit(DMZ_OFFLINE, &(z)->flags)
 #define dmz_is_readonly(z) test_bit(DMZ_READ_ONLY, &(z)->flags)
 #define dmz_in_reclaim(z)  test_bit(DMZ_RECLAIM, &(z)->flags)
+#define dmz_is_reserved(z) test_bit(DMZ_RESERVED, &(z)->flags)
 #define dmz_seq_write_err(z)   test_bit(DMZ_SEQ_WRITE_ERR, &(z)->flags)
 #define dmz_reclaim_should_terminate(z) \
test_bit(DMZ_RECLAIM_TERMINATE, &(z)->flags)
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 05/12] dm-zoned: add device pointer to struct dm_zone

2020-05-22 Thread Hannes Reinecke
Add a pointer to the containing device to struct dm_zone and
kill dmz_zone_to_dev().

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 47 --
 drivers/md/dm-zoned-reclaim.c  | 18 +++-
 drivers/md/dm-zoned-target.c   |  7 +++
 drivers/md/dm-zoned.h  |  4 +++-
 4 files changed, 26 insertions(+), 50 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index fdae4e0228e7..7b6e7404f1e8 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -229,16 +229,10 @@ struct dmz_metadata {
  */
 static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone 
*zone)
 {
-   unsigned int zone_id;
-
if (WARN_ON(!zone))
return 0;
 
-   zone_id = zone->id;
-   if (zmd->nr_devs > 1 &&
-   (zone_id >= zmd->dev[1].zone_offset))
-   zone_id -= zmd->dev[1].zone_offset;
-   return zone_id;
+   return zone->id - zone->dev->zone_offset;
 }
 
 sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone)
@@ -255,18 +249,6 @@ sector_t dmz_start_block(struct dmz_metadata *zmd, struct 
dm_zone *zone)
return (sector_t)zone_id << zmd->zone_nr_blocks_shift;
 }
 
-struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone)
-{
-   if (WARN_ON(!zone))
-   return >dev[0];
-
-   if (zmd->nr_devs > 1 &&
-   zone->id >= zmd->dev[1].zone_offset)
-   return >dev[1];
-
-   return >dev[0];
-}
-
 unsigned int dmz_zone_nr_blocks(struct dmz_metadata *zmd)
 {
return zmd->zone_nr_blocks;
@@ -1252,7 +1234,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
 
/* Read and check the primary super block */
zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone);
-   zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone);
+   zmd->sb[0].dev = zmd->sb[0].zone->dev;
ret = dmz_get_sb(zmd, >sb[0], 0);
if (ret) {
dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed");
@@ -1383,6 +1365,7 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned 
int num, void *data)
 
INIT_LIST_HEAD(>link);
atomic_set(>refcount, 0);
+   zone->dev = dev;
zone->id = idx;
zone->chunk = DMZ_MAP_UNMAPPED;
 
@@ -1442,6 +1425,7 @@ static int dmz_emulate_zones(struct dmz_metadata *zmd, 
struct dmz_dev *dev)
return -EBUSY;
INIT_LIST_HEAD(>link);
atomic_set(>refcount, 0);
+   zone->dev = dev;
zone->id = idx;
zone->chunk = DMZ_MAP_UNMAPPED;
set_bit(DMZ_CACHE, >flags);
@@ -1567,11 +1551,10 @@ static int dmz_update_zone_cb(struct blk_zone *blkz, 
unsigned int idx,
  */
 static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone)
 {
-   struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
unsigned int noio_flag;
int ret;
 
-   if (dev->flags & DMZ_BDEV_REGULAR)
+   if (zone->dev->flags & DMZ_BDEV_REGULAR)
return 0;
 
/*
@@ -1581,16 +1564,16 @@ static int dmz_update_zone(struct dmz_metadata *zmd, 
struct dm_zone *zone)
 * GFP_NOIO was specified.
 */
noio_flag = memalloc_noio_save();
-   ret = blkdev_report_zones(dev->bdev, dmz_start_sect(zmd, zone), 1,
+   ret = blkdev_report_zones(zone->dev->bdev, dmz_start_sect(zmd, zone), 1,
  dmz_update_zone_cb, zone);
memalloc_noio_restore(noio_flag);
 
if (ret == 0)
ret = -EIO;
if (ret < 0) {
-   dmz_dev_err(dev, "Get zone %u report failed",
+   dmz_dev_err(zone->dev, "Get zone %u report failed",
zone->id);
-   dmz_check_bdev(dev);
+   dmz_check_bdev(zone->dev);
return ret;
}
 
@@ -1604,7 +1587,6 @@ static int dmz_update_zone(struct dmz_metadata *zmd, 
struct dm_zone *zone)
 static int dmz_handle_seq_write_err(struct dmz_metadata *zmd,
struct dm_zone *zone)
 {
-   struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
unsigned int wp = 0;
int ret;
 
@@ -1613,7 +1595,8 @@ static int dmz_handle_seq_write_err(struct dmz_metadata 
*zmd,
if (ret)
return ret;
 
-   dmz_dev_warn(dev, "Processing zone %u write error (zone wp %u/%u)",
+   dmz_dev_warn(zone->dev,
+"Processing zone %u write error (zone wp %u/%u)",
 zone->id, zone->wp_block, wp);
 
if (zone->wp_block < wp) {
@@ -1641,13 +1624,11 @@ static int dmz_reset_zone(struct dmz_metadata *zmd, 
struct dm_zone *zone)
return 0;
 
if (!dmz_is_empty(zone) || dmz_seq_write_err(zone)) {
-   struct dmz_dev *dev = dmz_zone_to_dev(zmd, zone);
-
-   ret = 

[dm-devel] [PATCH 11/12] dm-zoned: round-robin load balancer for reclaiming zones

2020-05-22 Thread Hannes Reinecke
When reclaiming zones we should arbitrate between the zoned
devices to get a better throughput. So implement a simple
round-robin load balancer between the zoned devices.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 87784e7785bc..25dcad2a565f 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -171,6 +171,8 @@ struct dmz_metadata {
unsigned intnr_reserved_seq;
unsigned intnr_chunks;
 
+   unsigned intlast_alloc_idx;
+
/* Zone information array */
struct xarray   zones;
 
@@ -2178,7 +2180,7 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, 
unsigned long flags)
 {
struct list_head *list;
struct dm_zone *zone;
-   unsigned int dev_idx = 0;
+   unsigned int dev_idx = zmd->last_alloc_idx;
 
 again:
if (flags & DMZ_ALLOC_CACHE)
@@ -2214,6 +2216,9 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata *zmd, 
unsigned long flags)
zone = list_first_entry(list, struct dm_zone, link);
list_del_init(>link);
 
+   if (!(flags & DMZ_ALLOC_CACHE))
+   zmd->last_alloc_idx = (dev_idx + 1) % zmd->nr_devs;
+
if (dmz_is_cache(zone))
atomic_dec(>unmap_nr_cache);
else if (dmz_is_rnd(zone))
@@ -2839,6 +2844,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
zmd->dev = dev;
zmd->nr_devs = num_dev;
zmd->mblk_rbtree = RB_ROOT;
+   zmd->last_alloc_idx = 0;
init_rwsem(>mblk_sem);
mutex_init(>mblk_flush_lock);
spin_lock_init(>mblk_lock);
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 10/12] dm-zoned: support arbitrary number of devices

2020-05-22 Thread Hannes Reinecke
Remove the hard-coded limit of two devices and support an unlimited
number of additional zoned devices.
With that we need to increase the device-mapper version number to
3.0.0 as we've modified the interface.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c |  68 +++---
 drivers/md/dm-zoned-reclaim.c  |  28 ++---
 drivers/md/dm-zoned-target.c   | 129 +
 drivers/md/dm-zoned.h  |   9 +--
 4 files changed, 139 insertions(+), 95 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 5f44970a6187..87784e7785bc 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -260,6 +260,11 @@ unsigned int dmz_zone_nr_sectors_shift(struct dmz_metadata 
*zmd)
return zmd->zone_nr_sectors_shift;
 }
 
+unsigned int dmz_nr_devs(struct dmz_metadata *zmd)
+{
+   return zmd->nr_devs;
+}
+
 unsigned int dmz_nr_zones(struct dmz_metadata *zmd)
 {
return zmd->nr_zones;
@@ -270,24 +275,14 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
return zmd->nr_chunks;
 }
 
-unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
+unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx)
 {
-   unsigned int nr_rnd_zones = 0;
-   int i;
-
-   for (i = 0; i < zmd->nr_devs; i++)
-   nr_rnd_zones += zmd->dev[i].nr_rnd;
-   return nr_rnd_zones;
+   return zmd->dev[idx].nr_rnd;
 }
 
-unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
+unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx)
 {
-   unsigned int nr_unmap_rnd_zones = 0;
-   int i;
-
-   for (i = 0; i < zmd->nr_devs; i++)
-   nr_unmap_rnd_zones += atomic_read(>dev[i].unmap_nr_rnd);
-   return nr_unmap_rnd_zones;
+   return atomic_read(>dev[idx].unmap_nr_rnd);
 }
 
 unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
@@ -300,24 +295,14 @@ unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata 
*zmd)
return atomic_read(>unmap_nr_cache);
 }
 
-unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
+unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx)
 {
-   unsigned int nr_seq_zones = 0;
-   int i;
-
-   for (i = 0; i < zmd->nr_devs; i++)
-   nr_seq_zones += zmd->dev[i].nr_seq;
-   return nr_seq_zones;
+   return zmd->dev[idx].nr_seq;
 }
 
-unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
+unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx)
 {
-   unsigned int nr_unmap_seq_zones = 0;
-   int i;
-
-   for (i = 0; i < zmd->nr_devs; i++)
-   nr_unmap_seq_zones += atomic_read(>dev[i].unmap_nr_seq);
-   return nr_unmap_seq_zones;
+   return atomic_read(>dev[idx].unmap_nr_seq);
 }
 
 static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
@@ -1530,7 +1515,20 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
 */
zmd->sb[0].zone = dmz_get(zmd, 0);
 
-   zoned_dev = >dev[1];
+   for (i = 1; i < zmd->nr_devs; i++) {
+   zoned_dev = >dev[i];
+
+   ret = blkdev_report_zones(zoned_dev->bdev, 0,
+ BLK_ALL_ZONES,
+ dmz_init_zone, zoned_dev);
+   if (ret < 0) {
+   DMDEBUG("(%s): Failed to report zones, error 
%d",
+   zmd->devname, ret);
+   dmz_drop_zones(zmd);
+   return ret;
+   }
+   }
+   return 0;
}
 
/*
@@ -2921,10 +2919,14 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev,
  zmd->nr_data_zones, zmd->nr_chunks);
dmz_zmd_debug(zmd, "%u cache zones (%u unmapped)",
  zmd->nr_cache, atomic_read(>unmap_nr_cache));
-   dmz_zmd_debug(zmd, "%u random zones (%u unmapped)",
- dmz_nr_rnd_zones(zmd), dmz_nr_unmap_rnd_zones(zmd));
-   dmz_zmd_debug(zmd, "%u sequential zones (%u unmapped)",
- dmz_nr_seq_zones(zmd), dmz_nr_unmap_seq_zones(zmd));
+   for (i = 0; i < zmd->nr_devs; i++) {
+   dmz_zmd_debug(zmd, "%u random zones (%u unmapped)",
+ dmz_nr_rnd_zones(zmd, i),
+ dmz_nr_unmap_rnd_zones(zmd, i));
+   dmz_zmd_debug(zmd, "%u sequential zones (%u unmapped)",
+ dmz_nr_seq_zones(zmd, i),
+ dmz_nr_unmap_seq_zones(zmd, i));
+   }
dmz_zmd_debug(zmd, "  %u reserved sequential data zones",
  zmd->nr_reserved_seq);
dmz_zmd_debug(zmd, "Format:");
diff --git a/drivers/md/dm-zoned-reclaim.c 

[dm-devel] [PATCH 12/12] dm-zoned: per-device reclaim

2020-05-22 Thread Hannes Reinecke
Instead of having one reclaim workqueue for the entire set we should
be allocating a reclaim workqueue per device; that will reduce
contention and should boost performance for a multi-device setup.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-reclaim.c | 70 +--
 drivers/md/dm-zoned-target.c  | 36 +-
 drivers/md/dm-zoned.h | 38 ---
 3 files changed, 76 insertions(+), 68 deletions(-)

diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
index f2e053b5f2db..6f3d8f18b989 100644
--- a/drivers/md/dm-zoned-reclaim.c
+++ b/drivers/md/dm-zoned-reclaim.c
@@ -21,6 +21,8 @@ struct dmz_reclaim {
struct dm_kcopyd_throttle kc_throttle;
int kc_err;
 
+   int dev_idx;
+
unsigned long   flags;
 
/* Last target access time */
@@ -197,8 +199,8 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, struct 
dm_zone *dzone)
struct dmz_metadata *zmd = zrc->metadata;
int ret;
 
-   DMDEBUG("(%s): Chunk %u, move buf zone %u (weight %u) to data zone %u 
(weight %u)",
-   dmz_metadata_label(zmd),
+   DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone 
%u (weight %u)",
+   dmz_metadata_label(zmd), zrc->dev_idx,
dzone->chunk, bzone->id, dmz_weight(bzone),
dzone->id, dmz_weight(dzone));
 
@@ -236,8 +238,8 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, 
struct dm_zone *dzone)
struct dmz_metadata *zmd = zrc->metadata;
int ret = 0;
 
-   DMDEBUG("(%s): Chunk %u, move data zone %u (weight %u) to buf zone %u 
(weight %u)",
-   dmz_metadata_label(zmd),
+   DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone 
%u (weight %u)",
+   dmz_metadata_label(zmd), zrc->dev_idx,
chunk, dzone->id, dmz_weight(dzone),
bzone->id, dmz_weight(bzone));
 
@@ -294,8 +296,8 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, 
struct dm_zone *dzone)
if (!szone)
return -ENOSPC;
 
-   DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
-   dmz_metadata_label(zmd), chunk,
+   DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
+   dmz_metadata_label(zmd), zrc->dev_idx, chunk,
dmz_is_cache(dzone) ? "cache" : "rnd",
dzone->id, dmz_weight(dzone),
dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
@@ -368,8 +370,8 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
/* Get a data zone */
dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
if (!dzone) {
-   DMDEBUG("(%s): No zone found to reclaim",
-   dmz_metadata_label(zmd));
+   DMDEBUG("(%s/%u): No zone found to reclaim",
+   dmz_metadata_label(zmd), zrc->dev_idx);
return -EBUSY;
}
 
@@ -416,24 +418,26 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
 out:
if (ret) {
if (ret == -EINTR)
-   DMDEBUG("(%s): reclaim zone %u interrupted",
-   dmz_metadata_label(zmd), rzone->id);
+   DMDEBUG("(%s/%u): reclaim zone %u interrupted",
+   dmz_metadata_label(zmd), zrc->dev_idx,
+   rzone->id);
else
-   DMDEBUG("(%s): Failed to reclaim zone %u, err %d",
-   dmz_metadata_label(zmd), rzone->id, ret);
+   DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
+   dmz_metadata_label(zmd), zrc->dev_idx,
+   rzone->id, ret);
dmz_unlock_zone_reclaim(dzone);
return ret;
}
 
ret = dmz_flush_metadata(zrc->metadata);
if (ret) {
-   DMDEBUG("(%s): Metadata flush for zone %u failed, err %d",
-   dmz_metadata_label(zmd), rzone->id, ret);
+   DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
+   dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
return ret;
}
 
-   DMDEBUG("(%s): Reclaimed zone %u in %u ms",
-   dmz_metadata_label(zmd),
+   DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
+   dmz_metadata_label(zmd), zrc->dev_idx,
rzone->id, jiffies_to_msecs(jiffies - start));
return 0;
 }
@@ -448,12 +452,8 @@ static unsigned int dmz_reclaim_percentage(struct 
dmz_reclaim *zrc)
nr_zones = nr_cache;
nr_unmap = dmz_nr_unmap_cache_zones(zmd);
} else {
-   int i;
-
-   for (i = 0; i < dmz_nr_devs(zmd); i++) {
-

[dm-devel] [PATCH 08/12] dm-zoned: move random and sequential zones into struct dmz_dev

2020-05-22 Thread Hannes Reinecke
Random and sequential zones should be part of the respective
device structure to make arbitration between devices possible.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 143 +
 drivers/md/dm-zoned.h  |  10 +++
 2 files changed, 99 insertions(+), 54 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 1b9da698a812..5f44970a6187 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -192,21 +192,12 @@ struct dmz_metadata {
/* Zone allocation management */
struct mutexmap_lock;
struct dmz_mblock   **map_mblk;
-   unsigned intnr_rnd;
-   atomic_tunmap_nr_rnd;
-   struct list_headunmap_rnd_list;
-   struct list_headmap_rnd_list;
 
unsigned intnr_cache;
atomic_tunmap_nr_cache;
struct list_headunmap_cache_list;
struct list_headmap_cache_list;
 
-   unsigned intnr_seq;
-   atomic_tunmap_nr_seq;
-   struct list_headunmap_seq_list;
-   struct list_headmap_seq_list;
-
atomic_tnr_reserved_seq_zones;
struct list_headreserved_seq_zones_list;
 
@@ -281,12 +272,22 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
 
 unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
 {
-   return zmd->nr_rnd;
+   unsigned int nr_rnd_zones = 0;
+   int i;
+
+   for (i = 0; i < zmd->nr_devs; i++)
+   nr_rnd_zones += zmd->dev[i].nr_rnd;
+   return nr_rnd_zones;
 }
 
 unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
 {
-   return atomic_read(>unmap_nr_rnd);
+   unsigned int nr_unmap_rnd_zones = 0;
+   int i;
+
+   for (i = 0; i < zmd->nr_devs; i++)
+   nr_unmap_rnd_zones += atomic_read(>dev[i].unmap_nr_rnd);
+   return nr_unmap_rnd_zones;
 }
 
 unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
@@ -301,12 +302,22 @@ unsigned int dmz_nr_unmap_cache_zones(struct dmz_metadata 
*zmd)
 
 unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
 {
-   return zmd->nr_seq;
+   unsigned int nr_seq_zones = 0;
+   int i;
+
+   for (i = 0; i < zmd->nr_devs; i++)
+   nr_seq_zones += zmd->dev[i].nr_seq;
+   return nr_seq_zones;
 }
 
 unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
 {
-   return atomic_read(>unmap_nr_seq);
+   unsigned int nr_unmap_seq_zones = 0;
+   int i;
+
+   for (i = 0; i < zmd->nr_devs; i++)
+   nr_unmap_seq_zones += atomic_read(>dev[i].unmap_nr_seq);
+   return nr_unmap_seq_zones;
 }
 
 static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int zone_id)
@@ -1485,6 +1496,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
 
dev->metadata = zmd;
zmd->nr_zones += dev->nr_zones;
+
+   atomic_set(>unmap_nr_rnd, 0);
+   INIT_LIST_HEAD(>unmap_rnd_list);
+   INIT_LIST_HEAD(>map_rnd_list);
+
+   atomic_set(>unmap_nr_seq, 0);
+   INIT_LIST_HEAD(>unmap_seq_list);
+   INIT_LIST_HEAD(>map_seq_list);
}
 
if (!zmd->nr_zones) {
@@ -1702,9 +1721,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dmz_is_cache(dzone))
list_add_tail(>link, >map_cache_list);
else if (dmz_is_rnd(dzone))
-   list_add_tail(>link, >map_rnd_list);
+   list_add_tail(>link, >dev->map_rnd_list);
else
-   list_add_tail(>link, >map_seq_list);
+   list_add_tail(>link, >dev->map_seq_list);
 
/* Check buffer zone */
bzone_id = le32_to_cpu(dmap[e].bzone_id);
@@ -1738,7 +1757,7 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dmz_is_cache(bzone))
list_add_tail(>link, >map_cache_list);
else
-   list_add_tail(>link, >map_rnd_list);
+   list_add_tail(>link, >dev->map_rnd_list);
 next:
chunk++;
e++;
@@ -1763,9 +1782,9 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
if (dmz_is_cache(dzone))
zmd->nr_cache++;
else if (dmz_is_rnd(dzone))
-   zmd->nr_rnd++;
+   dzone->dev->nr_rnd++;
else
-   zmd->nr_seq++;
+   dzone->dev->nr_seq++;
 
if (dmz_is_data(dzone)) {
/* Already initialized */
@@ -1779,16 +1798,18 @@ static int dmz_load_mapping(struct dmz_metadata *zmd)
list_add_tail(>link, >unmap_cache_list);

[dm-devel] [PATCH 04/12] dm-zoned: secondary superblock must reside on the same devices than primary superblock

2020-05-22 Thread Hannes Reinecke
The secondary superblock must reside on the same device than the
primary superblock, so there's no need to re-calculate the device.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index b70a988fa771..fdae4e0228e7 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -1141,7 +1141,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
/* Bad first super block: search for the second one */
zmd->sb[1].block = zmd->sb[0].block + zone_nr_blocks;
zmd->sb[1].zone = xa_load(>zones, zone_id + 1);
-   zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
+   zmd->sb[1].dev = zmd->sb[0].dev;
for (i = 1; i < zmd->nr_rnd_zones; i++) {
if (dmz_read_sb(zmd, >sb[1], 1) != 0)
break;
@@ -1149,7 +1149,6 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
return 0;
zmd->sb[1].block += zone_nr_blocks;
zmd->sb[1].zone = dmz_get(zmd, zone_id + i);
-   zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
}
 
dmz_free_mblock(zmd, mblk);
@@ -1272,7 +1271,7 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
zmd->sb[1].zone = dmz_get(zmd, zone_id);
}
zmd->sb[1].block = dmz_start_block(zmd, zmd->sb[1].zone);
-   zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
+   zmd->sb[1].dev = zmd->sb[0].dev;
ret = dmz_get_sb(zmd, >sb[1], 1);
} else
ret = dmz_lookup_secondary_sb(zmd);
-- 
2.16.4

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH 03/12] dm-zoned: use on-stack superblock for tertiary devices

2020-05-22 Thread Hannes Reinecke
Checking the teriary superblock just consists of validating UUIDs,
crcs, and the generation number; it doesn't have contents which
would be required during the actual operation.
So we should use an on-stack superblock and avoid having to store
it together with the 'real' superblocks.

Signed-off-by: Hannes Reinecke 
---
 drivers/md/dm-zoned-metadata.c | 98 +++---
 1 file changed, 53 insertions(+), 45 deletions(-)

diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
index 3da6702bb1ae..b70a988fa771 100644
--- a/drivers/md/dm-zoned-metadata.c
+++ b/drivers/md/dm-zoned-metadata.c
@@ -174,7 +174,7 @@ struct dmz_metadata {
/* Zone information array */
struct xarray   zones;
 
-   struct dmz_sb   sb[3];
+   struct dmz_sb   sb[2];
unsigned intmblk_primary;
unsigned intsb_version;
u64 sb_gen;
@@ -995,10 +995,11 @@ int dmz_flush_metadata(struct dmz_metadata *zmd)
 /*
  * Check super block.
  */
-static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set)
+static int dmz_check_sb(struct dmz_metadata *zmd, struct dmz_sb *dsb,
+   bool tertiary)
 {
-   struct dmz_super *sb = zmd->sb[set].sb;
-   struct dmz_dev *dev = zmd->sb[set].dev;
+   struct dmz_super *sb = dsb->sb;
+   struct dmz_dev *dev = dsb->dev;
unsigned int nr_meta_zones, nr_data_zones;
u32 crc, stored_crc;
u64 gen;
@@ -1015,7 +1016,7 @@ static int dmz_check_sb(struct dmz_metadata *zmd, 
unsigned int set)
DMZ_META_VER, zmd->sb_version);
return -EINVAL;
}
-   if ((zmd->sb_version < 1) && (set == 2)) {
+   if ((zmd->sb_version < 1) && tertiary) {
dmz_dev_err(dev, "Tertiary superblocks are not supported");
return -EINVAL;
}
@@ -1059,7 +1060,7 @@ static int dmz_check_sb(struct dmz_metadata *zmd, 
unsigned int set)
return -ENXIO;
}
 
-   if (set == 2) {
+   if (tertiary) {
/*
 * Generation number should be 0, but it doesn't
 * really matter if it isn't.
@@ -1108,13 +1109,13 @@ static int dmz_check_sb(struct dmz_metadata *zmd, 
unsigned int set)
 /*
  * Read the first or second super block from disk.
  */
-static int dmz_read_sb(struct dmz_metadata *zmd, unsigned int set)
+static int dmz_read_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set)
 {
DMDEBUG("(%s): read superblock set %d dev %s block %llu",
zmd->devname, set, zmd->sb[set].dev->name,
zmd->sb[set].block);
-   return dmz_rdwr_block(zmd->sb[set].dev, REQ_OP_READ,
- zmd->sb[set].block, zmd->sb[set].mblk->page);
+   return dmz_rdwr_block(sb->dev, REQ_OP_READ,
+ sb->block, sb->mblk->page);
 }
 
 /*
@@ -1142,7 +1143,7 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
zmd->sb[1].zone = xa_load(>zones, zone_id + 1);
zmd->sb[1].dev = dmz_zone_to_dev(zmd, zmd->sb[1].zone);
for (i = 1; i < zmd->nr_rnd_zones; i++) {
-   if (dmz_read_sb(zmd, 1) != 0)
+   if (dmz_read_sb(zmd, >sb[1], 1) != 0)
break;
if (le32_to_cpu(zmd->sb[1].sb->magic) == DMZ_MAGIC)
return 0;
@@ -1160,9 +1161,9 @@ static int dmz_lookup_secondary_sb(struct dmz_metadata 
*zmd)
 }
 
 /*
- * Read the first or second super block from disk.
+ * Read a super block from disk.
  */
-static int dmz_get_sb(struct dmz_metadata *zmd, unsigned int set)
+static int dmz_get_sb(struct dmz_metadata *zmd, struct dmz_sb *sb, int set)
 {
struct dmz_mblock *mblk;
int ret;
@@ -1172,14 +1173,14 @@ static int dmz_get_sb(struct dmz_metadata *zmd, 
unsigned int set)
if (!mblk)
return -ENOMEM;
 
-   zmd->sb[set].mblk = mblk;
-   zmd->sb[set].sb = mblk->data;
+   sb->mblk = mblk;
+   sb->sb = mblk->data;
 
/* Read super block */
-   ret = dmz_read_sb(zmd, set);
+   ret = dmz_read_sb(zmd, sb, set);
if (ret) {
dmz_free_mblock(zmd, mblk);
-   zmd->sb[set].mblk = NULL;
+   sb->mblk = NULL;
return ret;
}
 
@@ -1253,13 +1254,13 @@ static int dmz_load_sb(struct dmz_metadata *zmd)
/* Read and check the primary super block */
zmd->sb[0].block = dmz_start_block(zmd, zmd->sb[0].zone);
zmd->sb[0].dev = dmz_zone_to_dev(zmd, zmd->sb[0].zone);
-   ret = dmz_get_sb(zmd, 0);
+   ret = dmz_get_sb(zmd, >sb[0], 0);
if (ret) {
dmz_dev_err(zmd->sb[0].dev, "Read primary super block failed");
return ret;
}
 
-   ret = dmz_check_sb(zmd, 0);
+   ret = dmz_check_sb(zmd, 

Re: [dm-devel] [PATCH] block: Improve io_opt limit stacking

2020-05-22 Thread Martin K. Petersen


>>> +   if (t->io_opt & (t->physical_block_size - 1))
>>> +   t->io_opt = lcm(t->io_opt, t->physical_block_size);
>
>> Any comment on this patch ?  Note: the patch the patch "nvme: Fix
>> io_opt limit setting" is already queued for 5.8.
>
> Setting io_opt to the physical block size is not correct.

Oh, missed the lcm(). But I'm still concerned about twiddling io_opt to
a value different than the one reported by an underlying device.

Setting io_opt to something that's less than a full stripe width in a
RAID, for instance, doesn't produce the expected result. So I think I'd
prefer not to set io_opt at all if it isn't consistent across all the
stacked devices.

Let me chew on it for a bit...

-- 
Martin K. Petersen  Oracle Linux Engineering

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] [PATCH] block: Improve io_opt limit stacking

2020-05-22 Thread Martin K. Petersen


Damien,

>> +if (t->io_opt & (t->physical_block_size - 1))
>> +t->io_opt = lcm(t->io_opt, t->physical_block_size);

> Any comment on this patch ?  Note: the patch the patch "nvme: Fix
> io_opt limit setting" is already queued for 5.8.

Setting io_opt to the physical block size is not correct.

-- 
Martin K. Petersen  Oracle Linux Engineering

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] [PATCH] dm-zoned: remove leftover hunk for switching to sequential zones

2020-05-22 Thread Damien Le Moal
On 2020/05/22 16:32, Hannes Reinecke wrote:
> Remove a leftover hunk to switch from sequential zones to random

...from random zones to sequential zones...

> zones when selecting a reclaim zone; the logic has moved into the
> caller and this patch is now pointless.

s/this patch/this hunk/ ?

> 
> Fixes: 34f5affd04c4 ("dm zoned: separate random and cache zones")
> Signed-off-by: Hannes Reinecke  ---
>  drivers/md/dm-zoned-metadata.c | 8 
>  1 file changed, 8 deletions(-)
> 
> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
> index db0dc2b5d44d..4a2e351365c5 100644
> --- a/drivers/md/dm-zoned-metadata.c
> +++ b/drivers/md/dm-zoned-metadata.c
> @@ -2111,14 +2111,6 @@ struct dm_zone *dmz_alloc_zone(struct dmz_metadata 
> *zmd, unsigned long flags)
>*/
>   if (!(flags & DMZ_ALLOC_RECLAIM))
>   return NULL;
> - /*
> -  * Use sequential write zones if we started off with random
> -  * zones and the list is empty
> -  */
> - if (list == >unmap_rnd_list) {
> - list = >unmap_seq_list;
> - goto again;
> - }
>   /*
>* Fallback to the reserved sequential zones
>*/

Apart from the commit message nits, looks good.

Reviewed-by: Damien Le Moal 

-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



[dm-devel] [PATCH] dm-zoned: remove leftover hunk for switching to sequential zones

2020-05-22 Thread Hannes Reinecke
Remove a leftover hunk to switch from sequential zones to random
zones when selecting a reclaim zone; the logic has moved into the
caller and this patch is now pointless.

Fixes: 34f5affd04c4 ("dm zoned: separate random and cache zones")
Signed-off-by: Hannes Reinecke unmap_rnd_list) {
-   list = >unmap_seq_list;
-   goto again;
-   }
/*
 * Fallback to the reserved sequential zones
 */
-- 
2.25.0

--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel



Re: [dm-devel] [PATCH] block: Improve io_opt limit stacking

2020-05-22 Thread Damien Le Moal
On 2020/05/14 15:58, Damien Le Moal wrote:
> When devices with different physical sector sizes are stacked, the
> largest value is used as the stacked device physical sector size. For
> the optimal IO size, the lowest common multiple (lcm) of the underlying
> devices is used for the stacked device. In this scenario, if only one of
> the underlying device reports an optimal IO size, that value is used as
> is for the stacked device but that value may not be a multiple of the
> stacked device physical sector size. In this case, blk_stack_limits()
> returns an error resulting in warnings being printed on device mapper
> startup (observed with dm-zoned dual drive setup combining a 512B
> sector SSD with a 4K sector HDD).
> 
> To fix this, rather than returning an error, the optimal IO size limit
> for the stacked device can be adjusted to the lowest common multiple
> (lcm) of the stacked physical sector size and optimal IO size, resulting
> in a value that is a multiple of the physical sector size while still
> being an optimal size for the underlying devices.
> 
> This patch is complementary to the patch "nvme: Fix io_opt limit
> setting" which prevents the nvme driver from reporting an optimal IO
> size equal to a namespace sector size for a device that does not report
> an optimal IO size.
> 
> Suggested-by: Keith Busch 
> Signed-off-by: Damien Le Moal 
> ---
>  block/blk-settings.c | 7 ++-
>  1 file changed, 2 insertions(+), 5 deletions(-)
> 
> diff --git a/block/blk-settings.c b/block/blk-settings.c
> index 9a2c23cd9700..9a2b017ff681 100644
> --- a/block/blk-settings.c
> +++ b/block/blk-settings.c
> @@ -561,11 +561,8 @@ int blk_stack_limits(struct queue_limits *t, struct 
> queue_limits *b,
>   }
>  
>   /* Optimal I/O a multiple of the physical block size? */
> - if (t->io_opt & (t->physical_block_size - 1)) {
> - t->io_opt = 0;
> - t->misaligned = 1;
> - ret = -1;
> - }
> + if (t->io_opt & (t->physical_block_size - 1))
> + t->io_opt = lcm(t->io_opt, t->physical_block_size);
>  
>   t->raid_partial_stripes_expensive =
>   max(t->raid_partial_stripes_expensive,
> 

Jens,

Any comment on this patch ?
Note: the patch the patch "nvme: Fix io_opt limit setting" is already queued for
5.8.

-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
dm-devel@redhat.com
https://www.redhat.com/mailman/listinfo/dm-devel