Calling verity_verify_io in bh for IO of all sizes is not suitable for embedded devices. From our tests, it can improve the performance of 4K synchronise random reads. For example: ./fio --name=rand_read --ioengine=psync --rw=randread --bs=4K \ --direct=1 --numjobs=8 --runtime=60 --time_based --group_reporting \ --filename=/dev/block/mapper/xx-verity
But it will degrade the performance of 512K synchronise sequential reads on our devices. For example: ./fio --name=read --ioengine=psync --rw=read --bs=512K --direct=1 \ --numjobs=8 --runtime=60 --time_based --group_reporting \ --filename=/dev/block/mapper/xx-verity A parameter array is introduced by this change. And users can modify the default config by /sys/module/dm_verity/parameters/use_bh_bytes. The default limits for NONE/RT/BE is set to 8192. The default limits for IDLE is set to 0. Call verity_verify_io directly when verity_end_io is not in hardirq. Signed-off-by: LongPing Wei <weilongp...@oppo.com> --- v3: 1. set the default limit to 8192 bytes for none,rt,be; 2. set the default limit to 0 bytes for idle; 3. fix the mistake in v2 that verity_verify_io won't be called when verity_use_bh return false; v2: 1. change from use_bh_blocks to use_bh_bytes; 2. update documention; 3. set the default limit to 4096bytes; 4. call verity_verify_io directly when verity_end_io is in softirq; 5. bump version of dm-verity; --- .../admin-guide/device-mapper/verity.rst | 8 ++++- drivers/md/dm-verity-target.c | 30 ++++++++++++++++--- 2 files changed, 33 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/device-mapper/verity.rst b/Documentation/admin-guide/device-mapper/verity.rst index a65c1602cb23..f7fb5ea3c0df 100644 --- a/Documentation/admin-guide/device-mapper/verity.rst +++ b/Documentation/admin-guide/device-mapper/verity.rst @@ -143,7 +143,13 @@ root_hash_sig_key_desc <key_description> try_verify_in_tasklet If verity hashes are in cache, verify data blocks in kernel tasklet instead - of workqueue. This option can reduce IO latency. + of workqueue. This option can reduce IO latency. The size limits could be + configured via /sys/module/dm_verity/parameters/use_bh_bytes. The four + parameters correspond to limits for IOPRIO_CLASS_NONE,IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE and IOPRIO_CLASS_IDLE in turn. + For example: + <none>,<rt>,<be>,<idle> + 4096,4096,4096,4096 Theory of operation =================== diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c index e86c1431b108..4d6148ed02e8 100644 --- a/drivers/md/dm-verity-target.c +++ b/drivers/md/dm-verity-target.c @@ -30,6 +30,7 @@ #define DM_VERITY_ENV_VAR_NAME "DM_VERITY_ERR_BLOCK_NR" #define DM_VERITY_DEFAULT_PREFETCH_SIZE 262144 +#define DM_VERITY_USE_BH_DEFAULT_BYTES 8192 #define DM_VERITY_MAX_CORRUPTED_ERRS 100 @@ -49,6 +50,15 @@ static unsigned int dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE module_param_named(prefetch_cluster, dm_verity_prefetch_cluster, uint, 0644); +static unsigned int dm_verity_use_bh_bytes[4] = { + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_NONE + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_RT + DM_VERITY_USE_BH_DEFAULT_BYTES, // IOPRIO_CLASS_BE + 0 // IOPRIO_CLASS_IDLE +}; + +module_param_array_named(use_bh_bytes, dm_verity_use_bh_bytes, uint, NULL, 0644); + static DEFINE_STATIC_KEY_FALSE(use_bh_wq_enabled); /* Is at least one dm-verity instance using ahash_tfm instead of shash_tfm? */ @@ -652,9 +662,16 @@ static void verity_bh_work(struct work_struct *w) verity_finish_io(io, errno_to_blk_status(err)); } +static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio) +{ + return ioprio <= IOPRIO_CLASS_IDLE && + bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]); +} + static void verity_end_io(struct bio *bio) { struct dm_verity_io *io = bio->bi_private; + unsigned short ioprio = IOPRIO_PRIO_CLASS(bio->bi_ioprio); if (bio->bi_status && (!verity_fec_is_enabled(io->v) || @@ -664,9 +681,14 @@ static void verity_end_io(struct bio *bio) return; } - if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq) { - INIT_WORK(&io->bh_work, verity_bh_work); - queue_work(system_bh_wq, &io->bh_work); + if (static_branch_unlikely(&use_bh_wq_enabled) && io->v->use_bh_wq && + verity_use_bh(bio->bi_iter.bi_size, ioprio)) { + if (in_hardirq() || irqs_disabled()) { + INIT_WORK(&io->bh_work, verity_bh_work); + queue_work(system_bh_wq, &io->bh_work); + } else { + verity_bh_work(&io->bh_work); + } } else { INIT_WORK(&io->work, verity_work); queue_work(io->v->verify_wq, &io->work); @@ -1761,7 +1783,7 @@ static struct target_type verity_target = { .name = "verity", /* Note: the LSMs depend on the singleton and immutable features */ .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE, - .version = {1, 10, 0}, + .version = {1, 11, 0}, .module = THIS_MODULE, .ctr = verity_ctr, .dtr = verity_dtr, -- 2.34.1