On 2020/05/23 0:39, Hannes Reinecke wrote:
> Instead of having one reclaim workqueue for the entire set we should
> be allocating a reclaim workqueue per device; that will reduce
> contention and should boost performance for a multi-device setup.
> 
> Signed-off-by: Hannes Reinecke <[email protected]>
> ---
>  drivers/md/dm-zoned-reclaim.c | 70 
> +++++++++++++++++++++----------------------
>  drivers/md/dm-zoned-target.c  | 36 +++++++++++++---------
>  drivers/md/dm-zoned.h         | 38 ++++++++++++-----------
>  3 files changed, 76 insertions(+), 68 deletions(-)
> 
> diff --git a/drivers/md/dm-zoned-reclaim.c b/drivers/md/dm-zoned-reclaim.c
> index f2e053b5f2db..6f3d8f18b989 100644
> --- a/drivers/md/dm-zoned-reclaim.c
> +++ b/drivers/md/dm-zoned-reclaim.c
> @@ -21,6 +21,8 @@ struct dmz_reclaim {
>       struct dm_kcopyd_throttle kc_throttle;
>       int                     kc_err;
>  
> +     int                     dev_idx;
> +
>       unsigned long           flags;
>  
>       /* Last target access time */
> @@ -197,8 +199,8 @@ static int dmz_reclaim_buf(struct dmz_reclaim *zrc, 
> struct dm_zone *dzone)
>       struct dmz_metadata *zmd = zrc->metadata;
>       int ret;
>  
> -     DMDEBUG("(%s): Chunk %u, move buf zone %u (weight %u) to data zone %u 
> (weight %u)",
> -             dmz_metadata_label(zmd),
> +     DMDEBUG("(%s/%u): Chunk %u, move buf zone %u (weight %u) to data zone 
> %u (weight %u)",
> +             dmz_metadata_label(zmd), zrc->dev_idx,
>               dzone->chunk, bzone->id, dmz_weight(bzone),
>               dzone->id, dmz_weight(dzone));
>  
> @@ -236,8 +238,8 @@ static int dmz_reclaim_seq_data(struct dmz_reclaim *zrc, 
> struct dm_zone *dzone)
>       struct dmz_metadata *zmd = zrc->metadata;
>       int ret = 0;
>  
> -     DMDEBUG("(%s): Chunk %u, move data zone %u (weight %u) to buf zone %u 
> (weight %u)",
> -             dmz_metadata_label(zmd),
> +     DMDEBUG("(%s/%u): Chunk %u, move data zone %u (weight %u) to buf zone 
> %u (weight %u)",
> +             dmz_metadata_label(zmd), zrc->dev_idx,
>               chunk, dzone->id, dmz_weight(dzone),
>               bzone->id, dmz_weight(bzone));
>  
> @@ -294,8 +296,8 @@ static int dmz_reclaim_rnd_data(struct dmz_reclaim *zrc, 
> struct dm_zone *dzone)
>       if (!szone)
>               return -ENOSPC;
>  
> -     DMDEBUG("(%s): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
> -             dmz_metadata_label(zmd), chunk,
> +     DMDEBUG("(%s/%u): Chunk %u, move %s zone %u (weight %u) to %s zone %u",
> +             dmz_metadata_label(zmd), zrc->dev_idx, chunk,
>               dmz_is_cache(dzone) ? "cache" : "rnd",
>               dzone->id, dmz_weight(dzone),
>               dmz_is_rnd(szone) ? "rnd" : "seq", szone->id);
> @@ -368,8 +370,8 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
>       /* Get a data zone */
>       dzone = dmz_get_zone_for_reclaim(zmd, dmz_target_idle(zrc));
>       if (!dzone) {
> -             DMDEBUG("(%s): No zone found to reclaim",
> -                     dmz_metadata_label(zmd));
> +             DMDEBUG("(%s/%u): No zone found to reclaim",
> +                     dmz_metadata_label(zmd), zrc->dev_idx);
>               return -EBUSY;
>       }
>  
> @@ -416,24 +418,26 @@ static int dmz_do_reclaim(struct dmz_reclaim *zrc)
>  out:
>       if (ret) {
>               if (ret == -EINTR)
> -                     DMDEBUG("(%s): reclaim zone %u interrupted",
> -                             dmz_metadata_label(zmd), rzone->id);
> +                     DMDEBUG("(%s/%u): reclaim zone %u interrupted",
> +                             dmz_metadata_label(zmd), zrc->dev_idx,
> +                             rzone->id);
>               else
> -                     DMDEBUG("(%s): Failed to reclaim zone %u, err %d",
> -                             dmz_metadata_label(zmd), rzone->id, ret);
> +                     DMDEBUG("(%s/%u): Failed to reclaim zone %u, err %d",
> +                             dmz_metadata_label(zmd), zrc->dev_idx,
> +                             rzone->id, ret);
>               dmz_unlock_zone_reclaim(dzone);
>               return ret;
>       }
>  
>       ret = dmz_flush_metadata(zrc->metadata);
>       if (ret) {
> -             DMDEBUG("(%s): Metadata flush for zone %u failed, err %d",
> -                     dmz_metadata_label(zmd), rzone->id, ret);
> +             DMDEBUG("(%s/%u): Metadata flush for zone %u failed, err %d",
> +                     dmz_metadata_label(zmd), zrc->dev_idx, rzone->id, ret);
>               return ret;
>       }
>  
> -     DMDEBUG("(%s): Reclaimed zone %u in %u ms",
> -             dmz_metadata_label(zmd),
> +     DMDEBUG("(%s/%u): Reclaimed zone %u in %u ms",
> +             dmz_metadata_label(zmd), zrc->dev_idx,
>               rzone->id, jiffies_to_msecs(jiffies - start));
>       return 0;
>  }
> @@ -448,12 +452,8 @@ static unsigned int dmz_reclaim_percentage(struct 
> dmz_reclaim *zrc)
>               nr_zones = nr_cache;
>               nr_unmap = dmz_nr_unmap_cache_zones(zmd);
>       } else {
> -             int i;
> -
> -             for (i = 0; i < dmz_nr_devs(zmd); i++) {
> -                     nr_zones += dmz_nr_rnd_zones(zmd, i);
> -                     nr_unmap += dmz_nr_unmap_rnd_zones(zmd, i);
> -             }
> +             nr_zones = dmz_nr_rnd_zones(zmd, zrc->dev_idx);
> +             nr_unmap = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx);
>       }
>       return nr_unmap * 100 / nr_zones;
>  }
> @@ -463,11 +463,9 @@ static unsigned int dmz_reclaim_percentage(struct 
> dmz_reclaim *zrc)
>   */
>  static bool dmz_should_reclaim(struct dmz_reclaim *zrc, unsigned int p_unmap)
>  {
> -     int i;
> -     unsigned int nr_reclaim = 0;
> +     unsigned int nr_reclaim;
>  
> -     for (i = 0; i < dmz_nr_devs(zrc->metadata); i++)
> -             nr_reclaim += dmz_nr_rnd_zones(zrc->metadata, i);
> +     nr_reclaim = dmz_nr_rnd_zones(zrc->metadata, zrc->dev_idx);
>  
>       if (dmz_nr_cache_zones(zrc->metadata))
>               nr_reclaim += dmz_nr_cache_zones(zrc->metadata);
> @@ -495,7 +493,7 @@ static void dmz_reclaim_work(struct work_struct *work)
>       struct dmz_reclaim *zrc = container_of(work, struct dmz_reclaim, 
> work.work);
>       struct dmz_metadata *zmd = zrc->metadata;
>       unsigned int p_unmap, nr_unmap_rnd = 0, nr_rnd = 0;
> -     int ret, i;
> +     int ret;
>  
>       if (dmz_dev_is_dying(zmd))
>               return;
> @@ -520,12 +518,11 @@ static void dmz_reclaim_work(struct work_struct *work)
>               zrc->kc_throttle.throttle = min(75U, 100U - p_unmap / 2);
>       }
>  
> -     for (i = 0; i < dmz_nr_devs(zmd); i++) {
> -             nr_unmap_rnd += dmz_nr_unmap_rnd_zones(zmd, i);
> -             nr_rnd += dmz_nr_rnd_zones(zmd, i);
> -     }
> -     DMDEBUG("(%s): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u 
> random)",
> -             dmz_metadata_label(zmd),
> +     nr_unmap_rnd = dmz_nr_unmap_rnd_zones(zmd, zrc->dev_idx);
> +     nr_rnd = dmz_nr_rnd_zones(zmd, zrc->dev_idx);
> +
> +     DMDEBUG("(%s/%u): Reclaim (%u): %s, %u%% free zones (%u/%u cache %u/%u 
> random)",
> +             dmz_metadata_label(zmd), zrc->dev_idx,
>               zrc->kc_throttle.throttle,
>               (dmz_target_idle(zrc) ? "Idle" : "Busy"),
>               p_unmap, dmz_nr_unmap_cache_zones(zmd),
> @@ -545,7 +542,7 @@ static void dmz_reclaim_work(struct work_struct *work)
>   * Initialize reclaim.
>   */
>  int dmz_ctr_reclaim(struct dmz_metadata *zmd,
> -                 struct dmz_reclaim **reclaim)
> +                 struct dmz_reclaim **reclaim, int idx)
>  {
>       struct dmz_reclaim *zrc;
>       int ret;
> @@ -556,6 +553,7 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
>  
>       zrc->metadata = zmd;
>       zrc->atime = jiffies;
> +     zrc->dev_idx = idx;
>  
>       /* Reclaim kcopyd client */
>       zrc->kc = dm_kcopyd_client_create(&zrc->kc_throttle);
> @@ -567,8 +565,8 @@ int dmz_ctr_reclaim(struct dmz_metadata *zmd,
>  
>       /* Reclaim work */
>       INIT_DELAYED_WORK(&zrc->work, dmz_reclaim_work);
> -     zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s", WQ_MEM_RECLAIM,
> -                                       dmz_metadata_label(zmd));
> +     zrc->wq = alloc_ordered_workqueue("dmz_rwq_%s_%d", WQ_MEM_RECLAIM,
> +                                       dmz_metadata_label(zmd), idx);
>       if (!zrc->wq) {
>               ret = -ENOMEM;
>               goto err;
> diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
> index f34fcc3f7cc6..a33c26a6ab31 100644
> --- a/drivers/md/dm-zoned-target.c
> +++ b/drivers/md/dm-zoned-target.c
> @@ -49,9 +49,6 @@ struct dmz_target {
>       /* For metadata handling */
>       struct dmz_metadata     *metadata;
>  
> -     /* For reclaim */
> -     struct dmz_reclaim      *reclaim;
> -
>       /* For chunk work */
>       struct radix_tree_root  chunk_rxtree;
>       struct workqueue_struct *chunk_wq;
> @@ -402,14 +399,15 @@ static void dmz_handle_bio(struct dmz_target *dmz, 
> struct dm_chunk_work *cw,
>               dm_per_bio_data(bio, sizeof(struct dmz_bioctx));
>       struct dmz_metadata *zmd = dmz->metadata;
>       struct dm_zone *zone;
> -     int ret;
> +     int i, ret;
>  
>       /*
>        * Write may trigger a zone allocation. So make sure the
>        * allocation can succeed.
>        */
>       if (bio_op(bio) == REQ_OP_WRITE)
> -             dmz_schedule_reclaim(dmz->reclaim);
> +             for (i = 0; i < dmz->nr_ddevs; i++)
> +                     dmz_schedule_reclaim(dmz->dev[i].reclaim);
>  
>       dmz_lock_metadata(zmd);
>  
> @@ -575,7 +573,6 @@ static int dmz_queue_chunk_work(struct dmz_target *dmz, 
> struct bio *bio)
>  
>       bio_list_add(&cw->bio_list, bio);
>  
> -     dmz_reclaim_bio_acc(dmz->reclaim);
>       if (queue_work(dmz->chunk_wq, &cw->work))
>               dmz_get_chunk_work(cw);
>  out:
> @@ -935,10 +932,12 @@ static int dmz_ctr(struct dm_target *ti, unsigned int 
> argc, char **argv)
>       mod_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
>  
>       /* Initialize reclaim */
> -     ret = dmz_ctr_reclaim(dmz->metadata, &dmz->reclaim);
> -     if (ret) {
> -             ti->error = "Zone reclaim initialization failed";
> -             goto err_fwq;
> +     for (i = 0; i < argc; i++) {
> +             ret = dmz_ctr_reclaim(dmz->metadata, &dmz->dev[i].reclaim, i);
> +             if (ret) {
> +                     ti->error = "Zone reclaim initialization failed";
> +                     goto err_fwq;
> +             }
>       }
>  
>       DMINFO("(%s): Target device: %llu 512-byte logical sectors (%llu 
> blocks)",
> @@ -971,11 +970,13 @@ static int dmz_ctr(struct dm_target *ti, unsigned int 
> argc, char **argv)
>  static void dmz_dtr(struct dm_target *ti)
>  {
>       struct dmz_target *dmz = ti->private;
> +     int i;
>  
>       flush_workqueue(dmz->chunk_wq);
>       destroy_workqueue(dmz->chunk_wq);
>  
> -     dmz_dtr_reclaim(dmz->reclaim);
> +     for (i = 0; i < dmz_nr_devs(dmz->metadata); i++)
> +             dmz_dtr_reclaim(dmz->dev[i].reclaim);
>  
>       cancel_delayed_work_sync(&dmz->flush_work);
>       destroy_workqueue(dmz->flush_wq);
> @@ -1044,9 +1045,11 @@ static int dmz_prepare_ioctl(struct dm_target *ti, 
> struct block_device **bdev)
>  static void dmz_suspend(struct dm_target *ti)
>  {
>       struct dmz_target *dmz = ti->private;
> +     int i;
>  
>       flush_workqueue(dmz->chunk_wq);
> -     dmz_suspend_reclaim(dmz->reclaim);
> +     for (i = 0; i < dmz->nr_ddevs; i++)
> +             dmz_suspend_reclaim(dmz->dev[i].reclaim);
>       cancel_delayed_work_sync(&dmz->flush_work);
>  }
>  
> @@ -1056,9 +1059,11 @@ static void dmz_suspend(struct dm_target *ti)
>  static void dmz_resume(struct dm_target *ti)
>  {
>       struct dmz_target *dmz = ti->private;
> +     int i;
>  
>       queue_delayed_work(dmz->flush_wq, &dmz->flush_work, DMZ_FLUSH_PERIOD);
> -     dmz_resume_reclaim(dmz->reclaim);
> +     for (i = 0; i < dmz->nr_ddevs; i++)
> +             dmz_resume_reclaim(dmz->dev[i].reclaim);
>  }
>  
>  static int dmz_iterate_devices(struct dm_target *ti,
> @@ -1130,7 +1135,10 @@ static int dmz_message(struct dm_target *ti, unsigned 
> int argc, char **argv,
>       int r = -EINVAL;
>  
>       if (!strcasecmp(argv[0], "reclaim")) {
> -             dmz_schedule_reclaim(dmz->reclaim);
> +             int i;
> +
> +             for (i = 0; i < dmz->nr_ddevs; i++)
> +                     dmz_schedule_reclaim(dmz->dev[i].reclaim);
>               r = 0;
>       } else
>               DMERR("unrecognized message %s", argv[0]);
> diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h
> index 0052eee12299..1ee91a3a4076 100644
> --- a/drivers/md/dm-zoned.h
> +++ b/drivers/md/dm-zoned.h
> @@ -54,6 +54,7 @@ struct dmz_reclaim;
>  struct dmz_dev {
>       struct block_device     *bdev;
>       struct dmz_metadata     *metadata;
> +     struct dmz_reclaim      *reclaim;
>  
>       char                    name[BDEVNAME_SIZE];
>       uuid_t                  uuid;
> @@ -240,23 +241,6 @@ static inline void dmz_activate_zone(struct dm_zone 
> *zone)
>       atomic_inc(&zone->refcount);
>  }
>  
> -/*
> - * Deactivate a zone. This decrement the zone reference counter
> - * indicating that all BIOs to the zone have completed when the count is 0.
> - */
> -static inline void dmz_deactivate_zone(struct dm_zone *zone)
> -{
> -     atomic_dec(&zone->refcount);
> -}
> -
> -/*
> - * Test if a zone is active, that is, has a refcount > 0.
> - */
> -static inline bool dmz_is_active(struct dm_zone *zone)
> -{
> -     return atomic_read(&zone->refcount);
> -}
> -
>  int dmz_lock_zone_reclaim(struct dm_zone *zone);
>  void dmz_unlock_zone_reclaim(struct dm_zone *zone);
>  struct dm_zone *dmz_get_zone_for_reclaim(struct dmz_metadata *zmd, bool 
> idle);
> @@ -283,7 +267,7 @@ int dmz_merge_valid_blocks(struct dmz_metadata *zmd, 
> struct dm_zone *from_zone,
>  /*
>   * Functions defined in dm-zoned-reclaim.c
>   */
> -int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc);
> +int dmz_ctr_reclaim(struct dmz_metadata *zmd, struct dmz_reclaim **zrc, int 
> idx);
>  void dmz_dtr_reclaim(struct dmz_reclaim *zrc);
>  void dmz_suspend_reclaim(struct dmz_reclaim *zrc);
>  void dmz_resume_reclaim(struct dmz_reclaim *zrc);
> @@ -296,4 +280,22 @@ void dmz_schedule_reclaim(struct dmz_reclaim *zrc);
>  bool dmz_bdev_is_dying(struct dmz_dev *dmz_dev);
>  bool dmz_check_bdev(struct dmz_dev *dmz_dev);
>  
> +/*
> + * Deactivate a zone. This decrement the zone reference counter
> + * indicating that all BIOs to the zone have completed when the count is 0.
> + */
> +static inline void dmz_deactivate_zone(struct dm_zone *zone)
> +{
> +     dmz_reclaim_bio_acc(zone->dev->reclaim);
> +     atomic_dec(&zone->refcount);
> +}
> +
> +/*
> + * Test if a zone is active, that is, has a refcount > 0.
> + */
> +static inline bool dmz_is_active(struct dm_zone *zone)
> +{
> +     return atomic_read(&zone->refcount);
> +}
> +
>  #endif /* DM_ZONED_H */
> 

Looks good.

Reviewed-by: Damien Le Moal <[email protected]>

-- 
Damien Le Moal
Western Digital Research



--
dm-devel mailing list
[email protected]
https://www.redhat.com/mailman/listinfo/dm-devel

Reply via email to