Calling verity_verify_io in bh for IO of all sizes is not suitable for
embedded devices. From our tests, it can improve the performance of 4K
synchronise random reads.
For example:
./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \
 --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \
 --filename=/dev/block/mapper/xx-verity

But it will degrade the performance of 512K synchronise sequential reads
on our devices.
For example:
./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \
 --numjobs=8 --runtime=60 --time_based --group_reporting \
 --filename=/dev/block/mapper/xx-verity

A parameter array is introduced by this change. And users can modify the
default config by /sys/module/dm_verity/parameters/use_bh_bytes.

The default limits for NONE/RT/BE is set to 8192.
The default limits for IDLE is set to 0.

Call verity_verify_io directly when verity_end_io is not in hardirq.

Signed-off-by: LongPing Wei <weilongp...@oppo.com>
---
v3:
1. set the default limit to 8192 bytes for none,rt,be;
2. set the default limit to 0 bytes for idle;
3. fix the mistake in v2 that verity_verify_io won't be called when
verity_use_bh return false;
v2:
1. change from use_bh_blocks to use_bh_bytes;
2. update documention;
3. set the default limit to 4096bytes;
4. call verity_verify_io directly when verity_end_io is in softirq;
5. bump version of dm-verity;
---
 .../admin-guide/device-mapper/verity.rst      |  8 ++++-
 drivers/md/dm-verity-target.c                 | 30 ++++++++++++++++---
 2 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/Documentation/admin-guide/device-mapper/verity.rst 
b/Documentation/admin-guide/device-mapper/verity.rst
index a65c1602cb23..f7fb5ea3c0df 100644
--- a/Documentation/admin-guide/device-mapper/verity.rst
+++ b/Documentation/admin-guide/device-mapper/verity.rst
@@ -143,7 +143,13 @@ root_hash_sig_key_desc <key_description>
 
 try_verify_in_tasklet
     If verity hashes are in cache, verify data blocks in kernel tasklet instead
-    of workqueue. This option can reduce IO latency.
+    of workqueue. This option can reduce IO latency. The size limits could be
+    configured via /sys/module/dm_verity/parameters/use_bh_bytes. The four
+    parameters correspond to limits for IOPRIO_CLASS_NONE,IOPRIO_CLASS_RT,
+    IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn.
+    For example:
+    <none>,<rt>,<be>,<idle>
+    4096,4096,4096,4096
 
 Theory of operation
 ===================
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index e86c1431b108..4d6148ed02e8 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -30,6 +30,7 @@
 #define DM_VERITY_ENV_VAR_NAME         "DM_VERITY_ERR_BLOCK_NR"
 
 #define DM_VERITY_DEFAULT_PREFETCH_SIZE        262144
+#define DM_VERITY_USE_BH_DEFAULT_BYTES 8192
 
 #define DM_VERITY_MAX_CORRUPTED_ERRS   100
 
@@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = 
DM_VERITY_DEFAULT_PREFETCH_SIZE
 
 module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644);
 
+static unsigned int dm_verity_use_bh_bytes[4] = {
+       DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_NONE
+       DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_RT
+       DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_BE
+       0                               // IOPRIO_CLASS_IDLE
+};
+
+module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 
0644);
+
 static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled);
 
 /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */
@@ -652,9 +662,16 @@ static void verity_bh_work(struct work_struct *w)
        verity_finish_io(io, errno_to_blk_status(err));
 }
 
+static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio)
+{
+       return ioprio <= IOPRIO_CLASS_IDLE &&
+               bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]);
+}
+
 static void verity_end_io(struct bio *bio)
 {
        struct dm_verity_io *io = bio->bi_private;
+       unsigned short ioprio = IOPRIO_PRIO_CLASS(bio->bi_ioprio);
 
        if (bio->bi_status &&
            (!verity_fec_is_enabled(io->v) ||
@@ -664,9 +681,14 @@ static void verity_end_io(struct bio *bio)
                return;
        }
 
-       if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) {
-               INIT_WORK(&io->bh_work, verity_bh_work);
-               queue_work(system_bh_wq, &io->bh_work);
+       if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq &&
+               verity_use_bh(bio->bi_iter.bi_size, ioprio)) {
+               if (in_hardirq() || irqs_disabled()) {
+                       INIT_WORK(&io->bh_work, verity_bh_work);
+                       queue_work(system_bh_wq, &io->bh_work);
+               } else {
+                       verity_bh_work(&io->bh_work);
+               }
        } else {
                INIT_WORK(&io->work, verity_work);
                queue_work(io->v->verify_wq, &io->work);
@@ -1761,7 +1783,7 @@ static struct target_type verity_target = {
        .name           = "verity",
 /* Note: the LSMs depend on the singleton and immutable features */
        .features       = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE,
-       .version        = {1, 10, 0},
+       .version        = {1, 11, 0},
        .module         = THIS_MODULE,
        .ctr            = verity_ctr,
        .dtr            = verity_dtr,
-- 
2.34.1


Reply via email to