Add a new block error injection interface that allows to inject specific status code for specific ranges.
Signed-off-by: Christoph Hellwig <[email protected]> --- Documentation/block/error-injection.rst | 59 +++++ Documentation/block/index.rst | 1 + block/Makefile | 1 + block/blk-core.c | 2 + block/blk-sysfs.c | 4 + block/blk.h | 15 ++ block/error-injection.c | 299 ++++++++++++++++++++++++ block/genhd.c | 4 + include/linux/blkdev.h | 5 + 9 files changed, 390 insertions(+) create mode 100644 Documentation/block/error-injection.rst create mode 100644 block/error-injection.c diff --git a/Documentation/block/error-injection.rst b/Documentation/block/error-injection.rst new file mode 100644 index 000000000000..be87091b5330 --- /dev/null +++ b/Documentation/block/error-injection.rst @@ -0,0 +1,59 @@ +.. SPDX-License-Identifier: GPL-2.0 + +============================ +Configurable Error Injection +============================ + +Overview +-------- + +Configurable error injection allows injecting specific block layer status codes +for ranges of a block device. Error can be injected unconditional, or with a +given probability. + +To use configurable error injection, CONFIG_FAIL_MAKE_REQUEST must be enabled. + +The only interface is the error_injection debugfs file, which is created for +each registered gendisk. Writes to this file are used to create or delete rules +and reads return a list of the current error injection sites. + +Options +------- + +The following options specify the operations: + +=================== ======================================================= +add add a new rule +removeall remove all existing rules +=================== ======================================================= + +The following options specify the details of the rule for the add operation: + +=================== ======================================================= +op=%s block layer operation this rule applies to, e.g. READ + or WRITE. + Mandatory. +start=%u First block layer sector the rule applies to. + Optional, defaults to 0. +nr_sectors=%u Number of sectors this rule applies. + Optional, defaults to the remainder of the device. +status=%s Status to return. + Mandatory. +chance=%u Only return a failure with a likelihood of 1/chance. + Optional, defaults to 1 (always). +=================== ======================================================= + +Example +------- + +Return BLK_STS_IOERR for one in 10 reads of sector 0 of /dev/nvme0n1: + + $ echo 'add,op=READ,start=0,status=IOERR,chance=10' > /sys/kernel/debug/block/nvme0n1/error_injection + +Return BLK_STS_MEDIUM for every write to /dev/nvme0n1: + + $ echo 'add,op=WRITE,start=0,status=MEDIUM' > /sys/kernel/debug/block/nvme0n1/error_injection + +Remove all rules for /dev/nvme0n1: + + $ echo 'removeall' > /sys/kernel/debug/block/nvme0n1/error_injection diff --git a/Documentation/block/index.rst b/Documentation/block/index.rst index 9fea696f9daa..bfa1bbd31ddf 100644 --- a/Documentation/block/index.rst +++ b/Documentation/block/index.rst @@ -22,3 +22,4 @@ Block switching-sched writeback_cache_control ublk + error-injection diff --git a/block/Makefile b/block/Makefile index 7dce2e44276c..d223b6b7d72f 100644 --- a/block/Makefile +++ b/block/Makefile @@ -11,6 +11,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \ disk-events.o blk-ia-ranges.o early-lookup.o +obj-$(CONFIG_FAIL_MAKE_REQUEST) += error-injection.o obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o diff --git a/block/blk-core.c b/block/blk-core.c index 8bbc03ce924f..04a392849ab0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -765,6 +765,8 @@ static void __submit_bio_noacct_mq(struct bio *bio) void submit_bio_noacct_nocheck(struct bio *bio, bool split) { if (unlikely(may_fail_bio(bio))) { + if (blk_error_inject(bio)) + return; if (should_fail_request(bio->bi_iter.bi_size)) { bio_io_error(bio); return; diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index f22c1f253eb3..43f909c7f0c9 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -933,6 +933,8 @@ static void blk_debugfs_remove(struct gendisk *disk) blk_debugfs_lock_nomemsave(q); blk_trace_shutdown(q); + if (IS_ENABLED(CONFIG_FAIL_MAKE_REQUEST)) + blk_error_injection_exit(disk); debugfs_remove_recursive(q->debugfs_dir); q->debugfs_dir = NULL; q->sched_debugfs_dir = NULL; @@ -963,6 +965,8 @@ int blk_register_queue(struct gendisk *disk) memflags = blk_debugfs_lock(q); q->debugfs_dir = debugfs_create_dir(disk->disk_name, blk_debugfs_root); + if (IS_ENABLED(CONFIG_FAIL_MAKE_REQUEST)) + blk_error_injection_init(disk); if (queue_is_mq(q)) blk_mq_debugfs_register(q); blk_debugfs_unlock(q, memflags); diff --git a/block/blk.h b/block/blk.h index 4857b899e2b6..19f925d8f39d 100644 --- a/block/blk.h +++ b/block/blk.h @@ -781,4 +781,19 @@ static inline void blk_debugfs_unlock(struct request_queue *q, memalloc_noio_restore(memflags); } +void blk_error_injection_init(struct gendisk *disk); +void blk_error_injection_exit(struct gendisk *disk); + +bool __blk_error_inject(struct bio *bio); +static inline bool blk_error_inject(struct bio *bio) +{ +#ifdef CONFIG_FAIL_MAKE_REQUEST + struct gendisk *disk = bio->bi_bdev->bd_disk; + + if (!list_empty_careful(&disk->error_injection_list)) + return __blk_error_inject(bio); +#endif + return false; +} + #endif /* BLK_INTERNAL_H */ diff --git a/block/error-injection.c b/block/error-injection.c new file mode 100644 index 000000000000..dc0420c4eb58 --- /dev/null +++ b/block/error-injection.c @@ -0,0 +1,299 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2026 Christoph Hellwig. + */ +#include <linux/debugfs.h> +#include <linux/blkdev.h> +#include <linux/parser.h> +#include <linux/seq_file.h> +#include "blk.h" + +struct blk_error_inject { + struct list_head entry; + sector_t start; + sector_t end; + enum req_op op; + blk_status_t status; + + /* only inject every 1 / chance times */ + unsigned int chance; +}; + +bool __blk_error_inject(struct bio *bio) +{ + struct gendisk *disk = bio->bi_bdev->bd_disk; + struct blk_error_inject *inj; + + rcu_read_lock(); + list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) { + if (bio->bi_iter.bi_sector <= inj->end && + bio_end_sector(bio) >= inj->start && + bio_op(bio) == inj->op) { + blk_status_t status = inj->status; + + if (inj->chance > 1 && + (get_random_u32() % inj->chance) != 0) + continue; + + rcu_read_unlock(); + pr_info_ratelimited("%pg: injecting %s error for %s at sector %llu:%u\n", + disk->part0, + blk_status_to_str(status), + blk_op_str(inj->op), + bio->bi_iter.bi_sector, + bio_sectors(bio)); + bio_endio_status(bio, status); + return true; + } + } + rcu_read_unlock(); + return false; +} + +static int error_inject_add(struct gendisk *disk, enum req_op op, + sector_t start, u64 nr_sectors, blk_status_t status, + unsigned int chance) +{ + struct blk_error_inject *inj; + + if (op == REQ_OP_LAST) + return -EINVAL; + if (status == BLK_STS_OK) + return -EINVAL; + if (U64_MAX - nr_sectors < start) + return -EINVAL; + + if (!nr_sectors) + nr_sectors = U64_MAX; + + inj = kzalloc_obj(*inj); + if (!inj) + return -ENOMEM; + + pr_debug_ratelimited("%pg: adding %s injection for %s at sector %llu:%llu\n", + disk->part0, blk_status_to_str(status), + blk_op_str(op), + start, nr_sectors); + + inj->op = op; + inj->start = start; + inj->end = start + nr_sectors - 1; + inj->status = status; + inj->chance = chance; + + /* + * Add to the front of the list so that newer entries can partially + * override other entries. This also intentional allows duplicate + * entries as there is no real reason to reject them. + */ + mutex_lock(&disk->error_injection_lock); + if (!disk_live(disk)) { + mutex_unlock(&disk->error_injection_lock); + return -EINVAL; + } + list_add(&inj->entry, &disk->error_injection_list); + mutex_unlock(&disk->error_injection_lock); + + bdev_set_flag(disk->part0, BD_MAKE_IT_FAIL); + return 0; +} + +static void error_inject_removall(struct gendisk *disk) +{ + struct blk_error_inject *inj; + + mutex_lock(&disk->error_injection_lock); + while ((inj = list_first_entry_or_null(&disk->error_injection_list, + struct blk_error_inject, entry))) { + list_del_rcu(&inj->entry); + mutex_unlock(&disk->error_injection_lock); + + kfree_rcu_mightsleep(inj); + + mutex_lock(&disk->error_injection_lock); + } + + mutex_unlock(&disk->error_injection_lock); + + bdev_clear_flag(disk->part0, BD_MAKE_IT_FAIL); +} + +enum options { + Opt_add = (1u << 0), + Opt_removeall = (1u << 1), + + Opt_op = (1u << 16), + Opt_start = (1u << 17), + Opt_nr_sectors = (1u << 18), + Opt_status = (1u << 19), + Opt_chance = (1u << 20), + + Opt_invalid, +}; + +static const match_table_t opt_tokens = { + { Opt_add, "add", }, + { Opt_removeall, "removeall", }, + { Opt_op, "op=%s", }, + { Opt_start, "start=%u" }, + { Opt_nr_sectors, "nr_sectors=%u" }, + { Opt_status, "status=%s" }, + { Opt_chance, "chance=%u" }, + { Opt_invalid, NULL, }, +}; + +static int match_op(substring_t *args, enum req_op *op) +{ + const char *tag; + + tag = match_strdup(args); + if (!tag) + return -ENOMEM; + *op = str_to_blk_op(tag); + if (*op == REQ_OP_LAST) + pr_warn("invalid op '%s'\n", tag); + kfree(tag); + return 0; +} + +static int match_status(substring_t *args, blk_status_t *status) +{ + const char *tag; + + tag = match_strdup(args); + if (!tag) + return -ENOMEM; + *status = tag_to_blk_status(tag); + if (!*status) + pr_warn("invalid status '%s'\n", tag); + kfree(tag); + return 0; +} + +static ssize_t blk_error_injection_write(struct file *file, + const char __user *ubuf, size_t count, loff_t *pos) +{ + struct gendisk *disk = file_inode(file)->i_private; + enum { Unset, Add, Removeall } action = Unset; + unsigned int option_mask = 0, chance = 1; + enum req_op op = REQ_OP_LAST; + u64 start = 0, nr_sectors = 0; + blk_status_t status = BLK_STS_OK; + substring_t args[MAX_OPT_ARGS]; + char *options, *o, *p; + ssize_t token, ret = 0; + + options = memdup_user_nul(ubuf, count); + if (!options) + return -ENOMEM; + + o = options; + while ((p = strsep(&o, ",\n")) != NULL) { + if (!*p) + continue; + token = match_token(p, opt_tokens, args); + option_mask |= token; + switch (token) { + case Opt_add: + if (action == Unset) + action = Add; + else + ret = -EINVAL; + break; + case Opt_removeall: + if (action == Unset) + action = Removeall; + else + ret = -EINVAL; + break; + case Opt_op: + ret = match_op(args, &op); + break; + case Opt_start: + ret = match_u64(args, &start); + break; + case Opt_nr_sectors: + ret = match_u64(args, &nr_sectors); + break; + case Opt_status: + ret = match_status(args, &status); + break; + case Opt_chance: + ret = match_uint(args, &chance); + if (!ret && chance == 0) + ret = -EINVAL; + break; + default: + pr_warn("unknown parameter or missing value '%s'\n", p); + ret = -EINVAL; + break; + } + if (ret) + goto out_free_options; + } + + switch (action) { + case Add: + ret = error_inject_add(disk, op, start, nr_sectors, status, + chance); + break; + case Removeall: + if (option_mask & ~Opt_removeall) + return -EINVAL; + error_inject_removall(disk); + break; + default: + ret = -EINVAL; + } + + if (!ret) + ret = count; +out_free_options: + kfree(options); + return ret; +} + +static int blk_error_injection_show(struct seq_file *s, void *private) +{ + struct gendisk *disk = s->private; + struct blk_error_inject *inj; + + rcu_read_lock(); + list_for_each_entry_rcu(inj, &disk->error_injection_list, entry) { + seq_printf(s, "%llu:%llu status=%s,chance=%u", + inj->start, inj->end, + blk_status_to_tag(inj->status), inj->chance); + seq_putc(s, '\n'); + } + rcu_read_unlock(); + return 0; +} + +static int blk_error_injection_open(struct inode *inode, struct file *file) +{ + return single_open(file, blk_error_injection_show, inode->i_private); +} + +static int blk_error_injection_release(struct inode *inode, struct file *file) +{ + return single_release(inode, file); +} + +static const struct file_operations blk_error_injection_fops = { + .owner = THIS_MODULE, + .write = blk_error_injection_write, + .read = seq_read, + .open = blk_error_injection_open, + .release = blk_error_injection_release, +}; + +void blk_error_injection_init(struct gendisk *disk) +{ + debugfs_create_file("error_injection", 0600, disk->queue->debugfs_dir, + disk, &blk_error_injection_fops); +} + +void blk_error_injection_exit(struct gendisk *disk) +{ + error_inject_removall(disk); +} diff --git a/block/genhd.c b/block/genhd.c index 7d6854fd28e9..30f42461d895 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1485,6 +1485,10 @@ struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, lockdep_init_map(&disk->lockdep_map, "(bio completion)", lkclass, 0); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED INIT_LIST_HEAD(&disk->slave_bdevs); +#endif +#ifdef CONFIG_FAIL_MAKE_REQUEST + mutex_init(&disk->error_injection_lock); + INIT_LIST_HEAD(&disk->error_injection_list); #endif mutex_init(&disk->rqos_state_mutex); kobject_init(&disk->queue_kobj, &blk_queue_ktype); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 17270a28c66d..8743ad616b7f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -227,6 +227,11 @@ struct gendisk { */ struct blk_independent_access_ranges *ia_ranges; +#ifdef CONFIG_FAIL_MAKE_REQUEST + struct mutex error_injection_lock; + struct list_head error_injection_list; +#endif + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; -- 2.53.0
