On 2020/05/25 16:52, Hannes Reinecke wrote:
> On 5/25/20 4:36 AM, Damien Le Moal wrote:
>> On 2020/05/23 0:39, Hannes Reinecke wrote:
>>> Remove the hard-coded limit of two devices and support an unlimited
>>> number of additional zoned devices.
>>> With that we need to increase the device-mapper version number to
>>> 3.0.0 as we've modified the interface.
>>>
>>> Signed-off-by: Hannes Reinecke <[email protected]>
>>> ---
>>>   drivers/md/dm-zoned-metadata.c |  68 +++++++++++-----------
>>>   drivers/md/dm-zoned-reclaim.c  |  28 ++++++---
>>>   drivers/md/dm-zoned-target.c   | 129 
>>> +++++++++++++++++++++++++----------------
>>>   drivers/md/dm-zoned.h          |   9 +--
>>>   4 files changed, 139 insertions(+), 95 deletions(-)
>>>
>>> diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c
>>> index 5f44970a6187..87784e7785bc 100644
>>> --- a/drivers/md/dm-zoned-metadata.c
>>> +++ b/drivers/md/dm-zoned-metadata.c
>>> @@ -260,6 +260,11 @@ unsigned int dmz_zone_nr_sectors_shift(struct 
>>> dmz_metadata *zmd)
>>>     return zmd->zone_nr_sectors_shift;
>>>   }
>>>   
>>> +unsigned int dmz_nr_devs(struct dmz_metadata *zmd)
>>> +{
>>> +   return zmd->nr_devs;
>>> +}
>>
>> Is this helper really needed ?
>>
> 
> Yes, in dm-zoned-reclaim.c

I meant to say: whoever needs to know the number of devices can just use
"zmd->nr_devs". No need for a helper for that was my point.


> 
>>> +
>>>   unsigned int dmz_nr_zones(struct dmz_metadata *zmd)
>>>   {
>>>     return zmd->nr_zones;
>>> @@ -270,24 +275,14 @@ unsigned int dmz_nr_chunks(struct dmz_metadata *zmd)
>>>     return zmd->nr_chunks;
>>>   }
>>>   
>>> -unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd)
>>> +unsigned int dmz_nr_rnd_zones(struct dmz_metadata *zmd, int idx)
>>>   {
>>> -   unsigned int nr_rnd_zones = 0;
>>> -   int i;
>>> -
>>> -   for (i = 0; i < zmd->nr_devs; i++)
>>> -           nr_rnd_zones += zmd->dev[i].nr_rnd;
>>> -   return nr_rnd_zones;
>>> +   return zmd->dev[idx].nr_rnd;
>>
>> AH. OK. So my comment on patch 8 is voided :)
>>
> Yeah, the patch arrangement could be improved; I'll see to roll both 
> changes into one patch.
> 
>>>   }
>>>   
>>> -unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd)
>>> +unsigned int dmz_nr_unmap_rnd_zones(struct dmz_metadata *zmd, int idx)
>>>   {
>>> -   unsigned int nr_unmap_rnd_zones = 0;
>>> -   int i;
>>> -
>>> -   for (i = 0; i < zmd->nr_devs; i++)
>>> -           nr_unmap_rnd_zones += atomic_read(&zmd->dev[i].unmap_nr_rnd);
>>> -   return nr_unmap_rnd_zones;
>>> +   return atomic_read(&zmd->dev[idx].unmap_nr_rnd);
>>>   }
>>>   
>>>   unsigned int dmz_nr_cache_zones(struct dmz_metadata *zmd)
>>> @@ -300,24 +295,14 @@ unsigned int dmz_nr_unmap_cache_zones(struct 
>>> dmz_metadata *zmd)
>>>     return atomic_read(&zmd->unmap_nr_cache);
>>>   }
>>>   
>>> -unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd)
>>> +unsigned int dmz_nr_seq_zones(struct dmz_metadata *zmd, int idx)
>>>   {
>>> -   unsigned int nr_seq_zones = 0;
>>> -   int i;
>>> -
>>> -   for (i = 0; i < zmd->nr_devs; i++)
>>> -           nr_seq_zones += zmd->dev[i].nr_seq;
>>> -   return nr_seq_zones;
>>> +   return zmd->dev[idx].nr_seq;
>>>   }
>>>   
>>> -unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd)
>>> +unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd, int idx)
>>>   {
>>> -   unsigned int nr_unmap_seq_zones = 0;
>>> -   int i;
>>> -
>>> -   for (i = 0; i < zmd->nr_devs; i++)
>>> -           nr_unmap_seq_zones += atomic_read(&zmd->dev[i].unmap_nr_seq);
>>> -   return nr_unmap_seq_zones;
>>> +   return atomic_read(&zmd->dev[idx].unmap_nr_seq);
>>>   }
>>>   
>>>   static struct dm_zone *dmz_get(struct dmz_metadata *zmd, unsigned int 
>>> zone_id)
>>> @@ -1530,7 +1515,20 @@ static int dmz_init_zones(struct dmz_metadata *zmd)
>>>              */
>>>             zmd->sb[0].zone = dmz_get(zmd, 0);
>>>   
>>> -           zoned_dev = &zmd->dev[1];
>>> +           for (i = 1; i < zmd->nr_devs; i++) {
>>> +                   zoned_dev = &zmd->dev[i];
>>> +
>>> +                   ret = blkdev_report_zones(zoned_dev->bdev, 0,
>>> +                                             BLK_ALL_ZONES,
>>> +                                             dmz_init_zone, zoned_dev);
>>> +                   if (ret < 0) {
>>> +                           DMDEBUG("(%s): Failed to report zones, error 
>>> %d",
>>> +                                   zmd->devname, ret);
>>> +                           dmz_drop_zones(zmd);
>>> +                           return ret;
>>> +                   }
>>> +           }
>>> +           return 0;
>>>     }
>>>   
>>>     /*
>>> @@ -2921,10 +2919,14 @@ int dmz_ctr_metadata(struct dmz_dev *dev, int 
>>> num_dev,
>>>                   zmd->nr_data_zones, zmd->nr_chunks);
>>>     dmz_zmd_debug(zmd, "    %u cache zones (%u unmapped)",
>>>                   zmd->nr_cache, atomic_read(&zmd->unmap_nr_cache));
>>> -   dmz_zmd_debug(zmd, "    %u random zones (%u unmapped)",
>>> -                 dmz_nr_rnd_zones(zmd), dmz_nr_unmap_rnd_zones(zmd));
>>> -   dmz_zmd_debug(zmd, "    %u sequential zones (%u unmapped)",
>>> -                 dmz_nr_seq_zones(zmd), dmz_nr_unmap_seq_zones(zmd));
>>> +   for (i = 0; i < zmd->nr_devs; i++) {
>>> +           dmz_zmd_debug(zmd, "    %u random zones (%u unmapped)",
>>> +                         dmz_nr_rnd_zones(zmd, i),
>>> +                         dmz_nr_unmap_rnd_zones(zmd, i));
>>> +           dmz_zmd_debug(zmd, "    %u sequential zones (%u unmapped)",
>>> +                         dmz_nr_seq_zones(zmd, i),
>>> +                         dmz_nr_unmap_seq_zones(zmd, i));
>>> +   }
>>>     dmz_zmd_debug(zmd, "  %u reserved sequential data zones",
>>>                   zmd->nr_reserved_seq);
>>>     dmz_zmd_debug(zmd, "Format:");
>>> diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
>>> index fba0d48e38a7..f2e053b5f2db 100644
>>> --- a/drivers/md/dm-zoned-reclaim.c
>>> +++ b/drivers/md/dm-zoned-reclaim.c
>>> @@ -442,15 +442,18 @@ static unsigned int dmz_reclaim_percentage(struct 
>>> dmz_reclaim *zrc)
>>>   {
>>>     struct dmz_metadata *zmd = zrc->metadata;
>>>     unsigned int nr_cache = dmz_nr_cache_zones(zmd);
>>> -   unsigned int nr_rnd = dmz_nr_rnd_zones(zmd);
>>> -   unsigned int nr_unmap, nr_zones;
>>> +   unsigned int nr_unmap = 0, nr_zones = 0;
>>>   
>>>     if (nr_cache) {
>>>             nr_zones = nr_cache;
>>>             nr_unmap = dmz_nr_unmap_cache_zones(zmd);
>>>     } else {
>>> -           nr_zones = nr_rnd;
>>> -           nr_unmap = dmz_nr_unmap_rnd_zones(zmd);
>>> +           int i;
>>> +
>>> +           for (i = 0; i < dmz_nr_devs(zmd); i++) {
>>> +                   nr_zones += dmz_nr_rnd_zones(zmd, i);
>>
>> May be not... We could keep constant totals in zmd to avoid this.
>>
>>> +                   nr_unmap += dmz_nr_unmap_rnd_zones(zmd, i);
>>> +           }
>>>     }
>>>     return nr_unmap * 100 / nr_zones;
>>>   }
>>> @@ -460,7 +463,11 @@ static unsigned int dmz_reclaim_percentage(struct 
>>> dmz_reclaim *zrc)
>>>    */
>>>   static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int 
>>> p_unmap)
>>>   {
>>> -   unsigned int nr_reclaim = dmz_nr_rnd_zones(zrc->metadata);
>>> +   int i;
>>> +   unsigned int nr_reclaim = 0;
>>> +
>>> +   for (i = 0; i < dmz_nr_devs(zrc->metadata); i++)
>>> +           nr_reclaim += dmz_nr_rnd_zones(zrc->metadata, i);
>>>   
>>>     if (dmz_nr_cache_zones(zrc->metadata))
>>>             nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
>>> @@ -487,8 +494,8 @@ static void dmz_reclaim_work(struct work_struct *work)
>>>   {
>>>     struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, 
>>> work.work);
>>>     struct dmz_metadata *zmd = zrc->metadata;
>>> -   unsigned int p_unmap;
>>> -   int ret;
>>> +   unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
>>> +   int ret, i;
>>>   
>>>     if (dmz_dev_is_dying(zmd))
>>>             return;
>>> @@ -513,14 +520,17 @@ static void dmz_reclaim_work(struct work_struct *work)
>>>             zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
>>>     }
>>>   
>>> +   for (i = 0; i < dmz_nr_devs(zmd); i++) {
>>> +           nr_unmap_rnd += dmz_nr_unmap_rnd_zones(zmd, i);
>>> +           nr_rnd += dmz_nr_rnd_zones(zmd, i);
>>> +   }
>>>     DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u 
>>> random)",
>>>             dmz_metadata_label(zmd),
>>>             zrc->kc_throttle.throttle,
>>>             (dmz_target_idle(zrc) ? "Idle" : "Busy"),
>>>             p_unmap, dmz_nr_unmap_cache_zones(zmd),
>>>             dmz_nr_cache_zones(zmd),
>>> -           dmz_nr_unmap_rnd_zones(zmd),
>>> -           dmz_nr_rnd_zones(zmd));
>>> +           nr_unmap_rnd, nr_rnd);
>>>   
>>>     ret = dmz_do_reclaim(zrc);
>>>     if (ret && ret != -EINTR) {
> 
> In the light of this I guess there is a benefit from having the counters
> in the metadata; that indeed would save us to having to export the 
> number of devices.
> I'll give it a go with the next round.
> 
>>> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
>>> index bca9a611b8dd..f34fcc3f7cc6 100644
>>> --- a/drivers/md/dm-zoned-target.c
>>> +++ b/drivers/md/dm-zoned-target.c
>>> @@ -13,8 +13,6 @@
>>>   
>>>   #define DMZ_MIN_BIOS              8192
>>>   
>>> -#define DMZ_MAX_DEVS               2
>>> -
>>>   /*
>>>    * Zone BIO context.
>>>    */
>>> @@ -40,9 +38,10 @@ struct dm_chunk_work {
>>>    * Target descriptor.
>>>    */
>>>   struct dmz_target {
>>> -   struct dm_dev           *ddev[DMZ_MAX_DEVS];
>>> +   struct dm_dev           **ddev;
>>> +   unsigned int            nr_ddevs;
>>>   
>>> -   unsigned long           flags;
>>> +   unsigned int            flags;
>>>   
>>>     /* Zoned block device information */
>>>     struct dmz_dev          *dev;
>>> @@ -764,7 +763,7 @@ static void dmz_put_zoned_device(struct dm_target *ti)
>>>     struct dmz_target *dmz = ti->private;
>>>     int i;
>>>   
>>> -   for (i = 0; i < DMZ_MAX_DEVS; i++) {
>>> +   for (i = 0; i < dmz->nr_ddevs; i++) {
>>>             if (dmz->ddev[i]) {
>>>                     dm_put_device(ti, dmz->ddev[i]);
>>>                     dmz->ddev[i] = NULL;
>>> @@ -777,21 +776,35 @@ static int dmz_fixup_devices(struct dm_target *ti)
>>>     struct dmz_target *dmz = ti->private;
>>>     struct dmz_dev *reg_dev, *zoned_dev;
>>>     struct request_queue *q;
>>> +   sector_t zone_nr_sectors = 0;
>>> +   int i;
>>>   
>>>     /*
>>> -    * When we have two devices, the first one must be a regular block
>>> -    * device and the second a zoned block device.
>>> +    * When we have more than on devices, the first one must be a
>>> +    * regular block device and the others zoned block devices.
>>>      */
>>> -   if (dmz->ddev[0] && dmz->ddev[1]) {
>>> +   if (dmz->nr_ddevs > 1) {
>>>             reg_dev = &dmz->dev[0];
>>>             if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) {
>>>                     ti->error = "Primary disk is not a regular device";
>>>                     return -EINVAL;
>>>             }
>>> -           zoned_dev = &dmz->dev[1];
>>> -           if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
>>> -                   ti->error = "Secondary disk is not a zoned device";
>>> -                   return -EINVAL;
>>> +           for (i = 1; i < dmz->nr_ddevs; i++) {
>>> +                   zoned_dev = &dmz->dev[i];
>>> +                   if (zoned_dev->flags & DMZ_BDEV_REGULAR) {
>>> +                           ti->error = "Secondary disk is not a zoned 
>>> device";
>>> +                           return -EINVAL;
>>> +                   }
>>> +                   q = bdev_get_queue(zoned_dev->bdev);
>>
>> May be add a comment here that we must check that all zoned devices have the
>> same zone size ?
>>
> 
> I thought it was self-explanatory; but maybe not.
> Will be adding it.

It is indeed not too hard to figure out. But a plain english sentence is nice 
too :)

> 
> Cheers,
> 
> Hannes
> 


-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to