Signed-off-by: Weiping Zhang <[email protected]>
---
drivers/block/null_blk.h | 7 +
drivers/block/null_blk_main.c | 294 ++++++++++++++++++++++++++++++++++++++++--
2 files changed, 288 insertions(+), 13 deletions(-)
diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h
index 34b22d6523ba..aa53c4b6de49 100644
--- a/drivers/block/null_blk.h
+++ b/drivers/block/null_blk.h
@@ -12,6 +12,7 @@
struct nullb_cmd {
struct list_head list;
+ struct list_head wrr_node;
struct llist_node ll_list;
struct __call_single_data csd;
struct request *rq;
@@ -23,6 +24,8 @@ struct nullb_cmd {
};
struct nullb_queue {
+ spinlock_t wrr_lock;
+ struct list_head wrr_head;
unsigned long *tag_map;
wait_queue_head_t wait;
unsigned int queue_depth;
@@ -83,6 +86,10 @@ struct nullb {
struct nullb_queue *queues;
unsigned int nr_queues;
char disk_name[DISK_NAME_LEN];
+
+ struct task_struct *wrr_thread;
+ atomic_long_t wrrd_inflight;
+ wait_queue_head_t wrrd_wait;
};
#ifdef CONFIG_BLK_DEV_ZONED
diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c
index 447d635c79a2..100fc0e13036 100644
--- a/drivers/block/null_blk_main.c
+++ b/drivers/block/null_blk_main.c
@@ -4,6 +4,8 @@
* Shaohua Li <[email protected]>
*/
#include <linux/module.h>
+#include <linux/kthread.h>
+#include <linux/blk-cgroup.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
@@ -75,6 +77,7 @@ enum {
NULL_IRQ_NONE = 0,
NULL_IRQ_SOFTIRQ = 1,
NULL_IRQ_TIMER = 2,
+ NULL_IRQ_WRR = 3,
};
enum {
@@ -87,10 +90,23 @@ static int g_no_sched;
module_param_named(no_sched, g_no_sched, int, 0444);
MODULE_PARM_DESC(no_sched, "No io scheduler");
+static int g_tagset_nr_maps = 1;
static int g_submit_queues = 1;
module_param_named(submit_queues, g_submit_queues, int, 0444);
MODULE_PARM_DESC(submit_queues, "Number of submission queues");
+#define NULLB_SUBMIT_QUEUE(attr, count) \
+static int g_submit_queues_##attr = count; \
+module_param_named(submit_queues_##attr, g_submit_queues_##attr, int, 0444); \
+MODULE_PARM_DESC(submit_queues_##attr, "Number of " #attr " submission
queues");
+
+NULLB_SUBMIT_QUEUE(default, 1)
+NULLB_SUBMIT_QUEUE(read, 0)
+NULLB_SUBMIT_QUEUE(poll, 0)
+NULLB_SUBMIT_QUEUE(wrr_low, 0)
+NULLB_SUBMIT_QUEUE(wrr_medium, 0)
+NULLB_SUBMIT_QUEUE(wrr_high, 0)
+
static int g_home_node = NUMA_NO_NODE;
module_param_named(home_node, g_home_node, int, 0444);
MODULE_PARM_DESC(home_node, "Home node for the device");
@@ -158,7 +174,7 @@ static int g_irqmode = NULL_IRQ_SOFTIRQ;
static int null_set_irqmode(const char *str, const struct kernel_param *kp)
{
return null_param_store_val(str, &g_irqmode, NULL_IRQ_NONE,
- NULL_IRQ_TIMER);
+ NULL_IRQ_WRR);
}
static const struct kernel_param_ops null_irqmode_param_ops = {
@@ -643,6 +659,22 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
hrtimer_start(&cmd->timer, kt, HRTIMER_MODE_REL);
}
+static void null_cmd_wrr_insert(struct nullb_cmd *cmd)
+{
+ struct nullb_queue *nq = cmd->nq;
+ struct nullb *nullb = nq->dev->nullb;
+ unsigned long flags;
+
+ INIT_LIST_HEAD(&cmd->wrr_node);
+ spin_lock_irqsave(&nq->wrr_lock, flags);
+ list_add_tail(&cmd->wrr_node, &nq->wrr_head);
+ spin_unlock_irqrestore(&nq->wrr_lock, flags);
+
+ /* wake up wrr_thread if needed */
+ if (atomic_long_inc_return(&nullb->wrrd_inflight) == 1)
+ wake_up_interruptible(&nullb->wrrd_wait);
+}
+
static void null_complete_rq(struct request *rq)
{
end_cmd(blk_mq_rq_to_pdu(rq));
@@ -1236,6 +1268,9 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
case NULL_IRQ_TIMER:
null_cmd_end_timer(cmd);
break;
+ case NULL_IRQ_WRR:
+ null_cmd_wrr_insert(cmd);
+ break;
}
return BLK_STS_OK;
}
@@ -1351,10 +1386,64 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx
*hctx,
return null_handle_cmd(cmd);
}
+static inline int null_hctx_nr_queue(int type)
+{
+ int ret;
+
+ switch (type) {
+ case HCTX_TYPE_DEFAULT:
+ ret = g_submit_queues_default;
+ break;
+ case HCTX_TYPE_READ:
+ ret = g_submit_queues_read;
+ break;
+ case HCTX_TYPE_POLL:
+ ret = g_submit_queues_poll;
+ break;
+ case HCTX_TYPE_WRR_LOW:
+ ret = g_submit_queues_wrr_low;
+ break;
+ case HCTX_TYPE_WRR_MEDIUM:
+ ret = g_submit_queues_wrr_medium;
+ break;
+ case HCTX_TYPE_WRR_HIGH:
+ ret = g_submit_queues_wrr_high;
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+
+ return ret;
+}
+
+static int null_map_queues(struct blk_mq_tag_set *set)
+{
+ int i, offset;
+
+ for (i = 0, offset = 0; i < set->nr_maps; i++) {
+ struct blk_mq_queue_map *map = &set->map[i];
+
+ map->nr_queues = null_hctx_nr_queue(i);
+
+ if (!map->nr_queues) {
+ BUG_ON(i == HCTX_TYPE_DEFAULT);
+ continue;
+ }
+
+ map->queue_offset = offset;
+ blk_mq_map_queues(map);
+ offset += map->nr_queues;
+ }
+
+ return 0;
+}
+
static const struct blk_mq_ops null_mq_ops = {
.queue_rq = null_queue_rq,
.complete = null_complete_rq,
.timeout = null_timeout_rq,
+ .map_queues = null_map_queues,
};
static void cleanup_queue(struct nullb_queue *nq)
@@ -1397,6 +1486,9 @@ static void null_del_dev(struct nullb *nullb)
cleanup_queues(nullb);
if (null_cache_active(nullb))
null_free_device_storage(nullb->dev, true);
+
+ if (dev->irqmode == NULL_IRQ_WRR)
+ kthread_stop(nullb->wrr_thread);
kfree(nullb);
dev->nullb = NULL;
}
@@ -1435,6 +1527,8 @@ static void null_init_queue(struct nullb *nullb, struct
nullb_queue *nq)
init_waitqueue_head(&nq->wait);
nq->queue_depth = nullb->queue_depth;
nq->dev = nullb->dev;
+ INIT_LIST_HEAD(&nq->wrr_head);
+ spin_lock_init(&nq->wrr_lock);
}
static void null_init_queues(struct nullb *nullb)
@@ -1549,6 +1643,7 @@ static int null_init_tag_set(struct nullb *nullb, struct
blk_mq_tag_set *set)
g_submit_queues;
set->queue_depth = nullb ? nullb->dev->hw_queue_depth :
g_hw_queue_depth;
+ set->nr_maps = g_tagset_nr_maps;
set->numa_node = nullb ? nullb->dev->home_node : g_home_node;
set->cmd_size = sizeof(struct nullb_cmd);
set->flags = BLK_MQ_F_SHOULD_MERGE;
@@ -1576,7 +1671,7 @@ static void null_validate_conf(struct nullb_device *dev)
dev->submit_queues = 1;
dev->queue_mode = min_t(unsigned int, dev->queue_mode, NULL_Q_MQ);
- dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_TIMER);
+ dev->irqmode = min_t(unsigned int, dev->irqmode, NULL_IRQ_WRR);
/* Do memory allocation, so set blocking */
if (dev->memory_backed)
@@ -1616,6 +1711,72 @@ static bool null_setup_fault(void)
return true;
}
+static inline void null_wrr_handle_map(struct nullb *nullb,
+ struct blk_mq_queue_map *map, int batch)
+{
+ int i, nr;
+ struct nullb_queue *nq;
+ struct nullb_cmd *cmd,*tmp;
+ unsigned long flags;
+
+ for (i = 0; i < map->nr_queues; i++) {
+ nq = &nullb->queues[i + map->queue_offset];
+ nr = batch;
+ spin_lock_irqsave(&nq->wrr_lock, flags);
+ list_for_each_entry_safe(cmd, tmp, &nq->wrr_head, wrr_node) {
+ list_del(&cmd->wrr_node);
+ blk_mq_end_request(cmd->rq, cmd->error);
+ atomic_long_dec(&nullb->wrrd_inflight);
+ if (nr-- == 0)
+ break;
+ }
+ spin_unlock_irqrestore(&nq->wrr_lock, flags);
+ }
+}
+
+static int null_wrr_thread(void *data)
+{
+ struct nullb *nullb = (struct nullb *)data;
+ struct blk_mq_tag_set *set = nullb->tag_set;
+ struct blk_mq_queue_map *map;
+ DEFINE_WAIT(wait);
+
+ while (1) {
+ if (kthread_should_stop())
+ goto out;
+
+ cond_resched();
+
+ /* handle each hardware queue in weighted round robin way */
+
+ map = &set->map[HCTX_TYPE_WRR_HIGH];
+ null_wrr_handle_map(nullb, map, 8);
+
+ map = &set->map[HCTX_TYPE_WRR_MEDIUM];
+ null_wrr_handle_map(nullb, map, 4);
+
+ map = &set->map[HCTX_TYPE_WRR_LOW];
+ null_wrr_handle_map(nullb, map, 1);
+
+ map = &set->map[HCTX_TYPE_POLL];
+ null_wrr_handle_map(nullb, map, 1);
+
+ map = &set->map[HCTX_TYPE_READ];
+ null_wrr_handle_map(nullb, map, 1);
+
+ map = &set->map[HCTX_TYPE_DEFAULT];
+ null_wrr_handle_map(nullb, map, 1);
+
+ prepare_to_wait(&nullb->wrrd_wait, &wait, TASK_INTERRUPTIBLE);
+ if (0 == atomic_long_read(&nullb->wrrd_inflight))
+ schedule();
+ finish_wait(&nullb->wrrd_wait, &wait);
+ }
+
+out:
+ return 0;
+}
+
static int null_add_dev(struct nullb_device *dev)
{
struct nullb *nullb;
@@ -1706,15 +1867,27 @@ static int null_add_dev(struct nullb_device *dev)
sprintf(nullb->disk_name, "nullb%d", nullb->index);
+ if (dev->irqmode == NULL_IRQ_WRR) {
+ init_waitqueue_head(&nullb->wrrd_wait);
+ atomic_long_set(&nullb->wrrd_inflight, 0);
+ nullb->wrr_thread = kthread_run(null_wrr_thread, (void *)nullb,
+ "wrrd_%s", nullb->disk_name);
+ if (!nullb->wrr_thread)
+ goto out_cleanup_zone;
+ }
+
rv = null_gendisk_register(nullb);
if (rv)
- goto out_cleanup_zone;
+ goto out_cleanup_wrrd;
mutex_lock(&lock);
list_add_tail(&nullb->list, &nullb_list);
mutex_unlock(&lock);
return 0;
+out_cleanup_wrrd:
+ if (dev->irqmode == NULL_IRQ_WRR)
+ kthread_stop(nullb->wrr_thread);
out_cleanup_zone:
if (dev->zoned)
null_zone_exit(dev);
@@ -1731,6 +1904,106 @@ static int null_add_dev(struct nullb_device *dev)
return rv;
}
+static int null_verify_queues(void)
+{
+ int queues, nr;
+
+ if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
+ if (g_submit_queues != nr_online_nodes) {
+ pr_warn("null_blk: submit_queues param is set to %u.\n",
+ nr_online_nodes);
+ g_submit_queues = nr_online_nodes;
+ }
+ } else if (g_submit_queues > nr_cpu_ids)
+ g_submit_queues = nr_cpu_ids;
+ else if (g_submit_queues <= 0)
+ g_submit_queues = 1;
+
+ /* at least leave one queue for default */
+ g_submit_queues_default = 1;
+ queues = g_submit_queues - 1;
+ if (queues == 0)
+ goto def;
+
+ /* read queues */
+ nr = g_submit_queues_read;
+ if (nr < 0 || nr > queues) {
+ pr_warn("null_blk: invalid read queue count\n");
+ return -EINVAL;
+ }
+ g_tagset_nr_maps++;
+ queues -= nr;
+ if (queues == 0)
+ goto read;
+
+ /* poll queues */
+ nr = g_submit_queues_poll;
+ if (nr < 0 || nr > queues) {
+ pr_warn("null_blk: invalid poll queue count\n");
+ return -EINVAL;
+ }
+ g_tagset_nr_maps++;
+ queues -= nr;
+ if (queues == 0)
+ goto poll;
+
+ /* wrr_low queues */
+ nr = g_submit_queues_wrr_low;
+ if (nr < 0 || nr > queues) {
+ pr_warn("null_blk: invalid wrr_low queue count\n");
+ return -EINVAL;
+ }
+ g_tagset_nr_maps++;
+ queues -= nr;
+ if (queues == 0)
+ goto wrr_low;
+
+ /* wrr_medium queues */
+ nr = g_submit_queues_wrr_medium;
+ if (nr < 0 || nr > queues) {
+ pr_warn("null_blk: invalid wrr_medium queue count\n");
+ return -EINVAL;
+ }
+ g_tagset_nr_maps++;
+ queues -= nr;
+ if (queues == 0)
+ goto wrr_medium;
+
+ /* wrr_high queues */
+ nr = g_submit_queues_wrr_high;
+ if (nr < 0 || nr > queues) {
+ pr_warn("null_blk: invalid wrr_medium queue count\n");
+ return -EINVAL;
+ }
+ g_tagset_nr_maps++;
+ queues -= nr;
+
+ /* add all others queue to default group */
+ g_submit_queues_default += queues;
+
+ goto out;
+
+def:
+ g_submit_queues_read = 0;
+read:
+ g_submit_queues_poll = 0;
+poll:
+ g_submit_queues_wrr_low = 0;
+wrr_low:
+ g_submit_queues_wrr_medium = 0;
+wrr_medium:
+ g_submit_queues_wrr_high = 0;
+out:
+ pr_info("null_blk: total submit queues:%d, nr_map:%d, default:%d, "
+ "read:%d, poll:%d, wrr_low:%d, wrr_medium:%d wrr_high:%d\n",
+ g_submit_queues, g_tagset_nr_maps, g_submit_queues_default,
+ g_submit_queues_read, g_submit_queues_poll,
+ g_submit_queues_wrr_low, g_submit_queues_wrr_medium,
+ g_submit_queues_wrr_high);
+
+ return 0;
+}
+
static int __init null_init(void)
{
int ret = 0;
@@ -1758,16 +2031,11 @@ static int __init null_init(void)
pr_err("null_blk: legacy IO path no longer available\n");
return -EINVAL;
}
- if (g_queue_mode == NULL_Q_MQ && g_use_per_node_hctx) {
- if (g_submit_queues != nr_online_nodes) {
- pr_warn("null_blk: submit_queues param is set to %u.\n",
- nr_online_nodes);
- g_submit_queues = nr_online_nodes;
- }
- } else if (g_submit_queues > nr_cpu_ids)
- g_submit_queues = nr_cpu_ids;
- else if (g_submit_queues <= 0)
- g_submit_queues = 1;
+
+ if (null_verify_queues()){
+ pr_err("null_blk: invalid submit queue parameter\n");
+ return -EINVAL;
+ }
if (g_queue_mode == NULL_Q_MQ && shared_tags) {
ret = null_init_tag_set(NULL, &tag_set);
--
2.14.1