On 4/3/24 01:42, Damien Le Moal wrote:
diff --git a/block/bio.c b/block/bio.c
index d24420ed1c4c..127c06443bca 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1576,6 +1576,12 @@ void bio_endio(struct bio *bio)
        if (!bio_integrity_endio(bio))
                return;
+ /*
+        * For write BIOs to zoned devices, signal the completion of the BIO so
+        * that the next write BIO can be submitted by zone write plugging.
+        */
+       blk_zone_write_bio_endio(bio);
+
        rq_qos_done_bio(bio);

The function name "blk_zone_write_bio_endio()" is misleading since that
function is called for all bio operation types and not only for zoned
write bios. How about renaming blk_zone_write_bio_endio() into blk_zone_bio_endio()? If that function is renamed the above comment is
no longer necessary in bio_endio() and can be moved to above the
blk_zone_bio_endio() definition.

@@ -1003,6 +1007,13 @@ static enum bio_merge_status 
bio_attempt_front_merge(struct request *req,
+       /*
+        * A front merge for zone writes can happen only if the user submitted
+        * writes out of order. Do not attempt this to let the write fail.
+        */

"zone writes" -> "zoned writes"?

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 88b541e8873f..2c6a317bef7c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -828,6 +828,8 @@ static void blk_complete_request(struct request *req)
                bio = next;
        } while (bio);
+ blk_zone_write_complete_request(req);

Same comment here as above: the function name blk_zone_write_complete_request() is misleading since that function is
called for all request types and not only for zoned writes. Please
rename blk_zone_write_complete_request() into
blk_zone_complete_request().

+       /*
+        * If the plug has a cached request for this queue, try use it.
+        */

try use it -> try to use it (I know this comes from upstream code).

+       if (blk_queue_is_zoned(q) && blk_zone_write_plug_bio(bio, nr_segs))
+               goto queue_exit;

The order of words in the blk_zone_write_plug_bio() function name seems
unusual to me. How about renaming that function into
blk_zone_plug_write_bio()?

+/*
+ * Zone write plug flags bits:

Zone write -> zoned write

+ *  - BLK_ZONE_WPLUG_PLUGGED: Indicate that the zone write plug is plugged,

Indicate -> Indicates

+static bool disk_insert_zone_wplug(struct gendisk *disk,
+                                  struct blk_zone_wplug *zwplug)
+{
+       struct blk_zone_wplug *zwplg;
+       unsigned long flags;
+       unsigned int idx =
+               hash_32(zwplug->zone_no, disk->zone_wplugs_hash_bits);
+
+       /*
+        * Add the new zone write plug to the hash table, but carefully as we
+        * are racing with other submission context, so we may already have a
+        * zone write plug for the same zone.
+        */
+       spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
+       hlist_for_each_entry_rcu(zwplg, &disk->zone_wplugs_hash[idx], node) {
+               if (zwplg->zone_no == zwplug->zone_no) {
+                       spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+                       return false;
+               }
+       }
+       hlist_add_head_rcu(&zwplug->node, &disk->zone_wplugs_hash[idx]);
+       spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
+
+       return true;
+}

The above code can be made easier to read and more compact by using
guard(spinlock_irqsave)(...) instead of spin_lock_irqsave() + spin_unlock_irqrestore().

+static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
+                                                 sector_t sector)
+{
+       unsigned int zno = disk_zone_no(disk, sector);
+       unsigned int idx = hash_32(zno, disk->zone_wplugs_hash_bits);
+       struct blk_zone_wplug *zwplug;
+
+       rcu_read_lock();
+
+       hlist_for_each_entry_rcu(zwplug, &disk->zone_wplugs_hash[idx], node) {
+               if (zwplug->zone_no == zno &&
+                   atomic_inc_not_zero(&zwplug->ref)) {
+                       rcu_read_unlock();
+                       return zwplug;
+               }
+       }
+
+       rcu_read_unlock();
+
+       return NULL;
+}

The above code can also be made more compact by using guard(rcu)()
instead of rcu_read_lock() + rcu_read_unlock().

+/*
+ * Get a reference on the write plug for the zone containing @sector.
+ * If the plug does not exist, it is allocated and hashed.
+ * Return a pointer to the zone write plug with the plug spinlock held.
+ */

Holding a spinlock upon return is not my favorite approach. Has the
following alternative been considered?
- Remove the spinlock flags argument from this function and instead add
  two other arguments: prev_plug_flags and new_plug_flags.
- If a zone plug is found or allocated, copy the existing zone plug
  flags into *prev_plug_flags and set the zone plug flags that have been
  passed in new_plug_flags (logical or).
- From blk_revalidate_zone_cb(), pass 0 as the new_plug_flags argument.
- From blk_zone_wplug_handle_write, pass BLK_ZONE_WPLUG_PLUGGED as the
  new_plug_flags argument.

Thanks,

Bart.

Reply via email to