Switch from early bio-based IO accounting (at the time DM clones each
incoming bio) to late IO accounting just before each remapped bio is
issued to underlying device via submit_bio_noacct().

Allows more precise bio-based IO accounting for DM targets that use
their own workqueues to perform additional processing of each bio in
conjunction with their DM_MAPIO_SUBMITTED return from their map
function.

Signed-off-by: Mike Snitzer <[email protected]>
---
 drivers/md/dm-core.h          |  1 +
 drivers/md/dm.c               | 93 +++++++++++++++++++++++++++++++++++++++----
 include/linux/device-mapper.h |  7 ++++
 3 files changed, 93 insertions(+), 8 deletions(-)

diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 8dd196aec130..3ecd6f294f53 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -230,6 +230,7 @@ struct dm_io {
        atomic_t io_count;
        struct bio *orig_bio;
        unsigned long start_time;
+       unsigned long io_acct_time;
        spinlock_t lock;
        struct dm_stats_aux stats_aux;
        /* last member of dm_target_io is 'struct bio' */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8c0e96b8e1a5..ad512f40716e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -485,21 +485,54 @@ u64 dm_start_time_ns_from_clone(struct bio *bio)
 }
 EXPORT_SYMBOL_GPL(dm_start_time_ns_from_clone);
 
-static void start_io_acct(struct dm_io *io)
+static void __start_io_acct(struct dm_io *io, struct bio *bio)
 {
-       struct bio *bio = io->orig_bio;
+       unsigned long flags;
 
-       bio_start_io_acct_time(bio, io->start_time);
+       /* Ensure IO accounting is only ever started once */
+       spin_lock_irqsave(&io->lock, flags);
+       if (smp_load_acquire(&io->io_acct_time)) {
+               spin_unlock_irqrestore(&io->lock, flags);
+               return;
+       }
+       smp_store_release(&io->io_acct_time, jiffies);
+       spin_unlock_irqrestore(&io->lock, flags);
 
+       bio_start_io_acct_time(bio, io->start_time);
        if (unlikely(dm_stats_used(&io->md->stats)))
                dm_stats_account_io(&io->md->stats, bio_data_dir(bio),
                                    bio->bi_iter.bi_sector, bio_sectors(bio),
                                    false, 0, &io->stats_aux);
 }
 
+static void start_io_acct(struct dm_io *io, struct bio *bio)
+{
+       /* Only start_io_acct() once for this IO */
+       if (smp_load_acquire(&io->io_acct_time))
+               return;
+
+       __start_io_acct(io, bio);
+}
+
+static void clone_and_start_io_acct(struct dm_io *io, struct bio *bio)
+{
+       struct bio io_acct_clone;
+
+       /* Only clone_and_start_io_acct() once for this IO */
+       if (smp_load_acquire(&io->io_acct_time))
+               return;
+
+       bio_init_clone(io->orig_bio->bi_bdev,
+                      &io_acct_clone, bio, GFP_NOIO);
+       __start_io_acct(io, &io_acct_clone);
+}
+
 static void end_io_acct(struct mapped_device *md, struct bio *bio,
                        unsigned long start_time, struct dm_stats_aux 
*stats_aux)
 {
+       if (!start_time)
+               return;
+
        bio_end_io_acct(bio, start_time);
 
        if (unlikely(dm_stats_used(&md->stats)))
@@ -529,6 +562,7 @@ static struct dm_io *alloc_io(struct mapped_device *md, 
struct bio *bio)
        spin_lock_init(&io->lock);
 
        io->start_time = jiffies;
+       io->io_acct_time = 0;
 
        return io;
 }
@@ -818,7 +852,8 @@ void dm_io_dec_pending(struct dm_io *io, blk_status_t error)
                }
 
                io_error = io->status;
-               start_time = io->start_time;
+               if (io->io_acct_time)
+                       start_time = io->start_time;
                stats_aux = io->stats_aux;
                free_io(io);
                end_io_acct(md, bio, start_time, &stats_aux);
@@ -1099,6 +1134,43 @@ void dm_accept_partial_bio(struct bio *bio, unsigned 
n_sectors)
 }
 EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
 
+/*
+ * @clone: clone bio that DM core passed to target's .map function
+ * @tgt_clone: bio that target needs to submit (after DM_MAPIO_SUBMITTED)
+ *
+ * Targets should use this interface to submit bios they take
+ * ownership of when returning DM_MAPIO_SUBMITTED.
+ *
+ * Target should also enable ti->accounts_remapped_io
+ */
+void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone)
+{
+       struct dm_target_io *tio = clone_to_tio(clone);
+       struct dm_io *io = tio->io;
+       struct block_device *clone_bdev = clone->bi_bdev;
+
+       /* establish bio that will get submitted */
+       if (!tgt_clone)
+               tgt_clone = clone;
+
+       /*
+        * account IO to DM device in terms of clone's
+        * payload to avoid concern about late bio splitting.
+        * - clone will reflect any dm_accept_partial_bio()
+        * - any bio splitting is ultimately reflected in
+        *   io->orig_bio so there is no IO imbalance in
+        *   end_io_acct().
+        */
+       clone->bi_bdev = io->orig_bio->bi_bdev;
+       start_io_acct(io, clone);
+       clone->bi_bdev = clone_bdev;
+
+       trace_block_bio_remap(tgt_clone, bio_dev(io->orig_bio),
+                             tio->old_sector);
+       submit_bio_noacct(tgt_clone);
+}
+EXPORT_SYMBOL_GPL(dm_submit_bio_remap);
+
 static noinline void __set_swap_bios_limit(struct mapped_device *md, int latch)
 {
        mutex_lock(&md->swap_bios_lock);
@@ -1151,12 +1223,18 @@ static void __map_bio(struct bio *clone)
        switch (r) {
        case DM_MAPIO_SUBMITTED:
                /* target has assumed ownership of this io */
+               if (!ti->accounts_remapped_io) {
+                       /*
+                        * Any split isn't reflected in io->orig_bio yet. And 
bio
+                        * cannot be modified because target is submitting it.
+                        * Clone bio and account IO to DM device.
+                        */
+                       clone_and_start_io_acct(io, clone);
+               }
                break;
        case DM_MAPIO_REMAPPED:
                /* the bio has been remapped so dispatch it */
-               trace_block_bio_remap(clone, bio_dev(io->orig_bio),
-                                     tio->old_sector);
-               submit_bio_noacct(clone);
+               dm_submit_bio_remap(clone, NULL);
                break;
        case DM_MAPIO_KILL:
        case DM_MAPIO_REQUEUE:
@@ -1403,7 +1481,6 @@ static void dm_split_and_process_bio(struct mapped_device 
*md,
                submit_bio_noacct(bio);
        }
 out:
-       start_io_acct(ci.io);
        /* drop the extra reference count */
        dm_io_dec_pending(ci.io, errno_to_blk_status(error));
 }
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index b26fecf6c8e8..a3e397155bc9 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -362,6 +362,12 @@ struct dm_target {
         * zone append operations using regular writes.
         */
        bool emulate_zone_append:1;
+
+       /*
+        * Set if the target will submit IO using dm_submit_bio_remap()
+        * after returning DM_MAPIO_SUBMITTED from its map function.
+        */
+       bool accounts_remapped_io:1;
 };
 
 void *dm_per_bio_data(struct bio *bio, size_t data_size);
@@ -465,6 +471,7 @@ int dm_suspended(struct dm_target *ti);
 int dm_post_suspending(struct dm_target *ti);
 int dm_noflush_suspending(struct dm_target *ti);
 void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
+void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
 union map_info *dm_get_rq_mapinfo(struct request *rq);
 
 #ifdef CONFIG_BLK_DEV_ZONED
-- 
2.15.0

--
dm-devel mailing list
[email protected]
https://listman.redhat.com/mailman/listinfo/dm-devel

Reply via email to