回复: Question on io-wq
发件人: Zhang, Qiang 发送时间: 2020年10月23日 11:55 收件人: Jens Axboe 抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org 主题: 回复: Question on io-wq 发件人: Jens Axboe 发送时间: 2020年10月22日 22:08 收件人: Zhang, Qiang 抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org 主题: Re: Question on io-wq On 10/22/20 3:02 AM, Zhang,Qiang wrote: > > Hi Jens Axboe > > There are some problem in 'io_wqe_worker' thread, when the > 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA > nodes, due to CPU hotplug, When the last CPU going down, the > 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes > online again, we should restore their cpu bindings? >Something like the below should help in ensuring affinities are >always correct - trigger an affinity set for an online CPU event. We >should not need to do it for offlining. Can you test it? >diff --git a/fs/io-wq.c b/fs/io-wq.c >index 4012ff541b7b..3bf029d1170e 100644 >--- a/fs/io-wq.c >+++ b/fs/io-wq.c >@@ -19,6 +19,7 @@ >#include >#include >#include >+#include >#include "io-wq.h" > >@@ -123,9 +124,13 @@ struct io_wq { > refcount_t refs; > struct completion done; > >+ struct hlist_node cpuhp_node; >+ > refcount_t use_refs; >}; > >+static enum cpuhp_state io_wq_online; >+ >static bool io_worker_get(struct io_worker *worker) >{ > return refcount_inc_not_zero(>ref); >@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > return ERR_PTR(-ENOMEM); > } > >+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, >>cpuhp_node); >+ if (ret) { >+ kfree(wq->wqes); >+ kfree(wq); >+ return ERR_PTR(ret); >+ } >+ >wq->free_work = data->free_work; >wq->do_work = data->do_work; > >@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > ret = PTR_ERR(wq->manager); > complete(>done); >err: >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node); > for_each_node(node) > kfree(wq->wqes[node]); > kfree(wq->wqes); >@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq) >{ > int node; > >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node); >+ > set_bit(IO_WQ_BIT_EXIT, >state); > if (wq->manager) > kthread_stop(wq->manager); >@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq) >{ > return wq->manager; >} >+ >+static bool io_wq_worker_affinity(struct io_worker *worker, void *data) >+{ >+ struct task_struct *task = worker->task; >+ unsigned long flags; >+ struct rq_flags rf; struct rq *rq; rq = task_rq_lock(task, ); --- raw_spin_lock_irqsave(>pi_lock, flags); >+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); >+ task->flags |= PF_NO_SETAFFINITY; --- raw_spin_unlock_irqrestore(>pi_lock, flags); task_rq_unlock(rq, task, ); >+ return false; >+} >+ >+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) >+{ >+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); >+ int i; >+ >+ rcu_read_lock(); >+ for_each_node(i) >+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >>NULL); >+ rcu_read_unlock(); >+ return 0; >+} >+ >+static __init int io_wq_init(void) >+{ >+ int ret; >+ >+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online", >+ io_wq_cpu_online, NULL); >+ if (ret < 0) >+ return ret; >+ io_wq_online = ret; >+ return 0; >+} >+subsys_initcall(io_wq_init); > >-- >Jens Axboe
回复: Question on io-wq
发件人: Jens Axboe 发送时间: 2020年10月22日 22:08 收件人: Zhang, Qiang 抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org 主题: Re: Question on io-wq On 10/22/20 3:02 AM, Zhang,Qiang wrote: > > Hi Jens Axboe > > There are some problem in 'io_wqe_worker' thread, when the > 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA > nodes, due to CPU hotplug, When the last CPU going down, the > 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes > online again, we should restore their cpu bindings? >Something like the below should help in ensuring affinities are >always correct - trigger an affinity set for an online CPU event. We >should not need to do it for offlining. Can you test it? >diff --git a/fs/io-wq.c b/fs/io-wq.c >index 4012ff541b7b..3bf029d1170e 100644 >--- a/fs/io-wq.c >+++ b/fs/io-wq.c >@@ -19,6 +19,7 @@ >#include >#include >#include >+#include >#include "io-wq.h" > >@@ -123,9 +124,13 @@ struct io_wq { > refcount_t refs; > struct completion done; > >+ struct hlist_node cpuhp_node; >+ > refcount_t use_refs; >}; > >+static enum cpuhp_state io_wq_online; >+ >static bool io_worker_get(struct io_worker *worker) >{ > return refcount_inc_not_zero(>ref); >@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > return ERR_PTR(-ENOMEM); > } > >+ ret = cpuhp_state_add_instance_nocalls(io_wq_online, >>cpuhp_node); >+ if (ret) { >+ kfree(wq->wqes); >+ kfree(wq); >+ return ERR_PTR(ret); >+ } >+ >wq->free_work = data->free_work; >wq->do_work = data->do_work; > >@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct >io_wq_data *data) > ret = PTR_ERR(wq->manager); > complete(>done); >err: >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node); > for_each_node(node) > kfree(wq->wqes[node]); > kfree(wq->wqes); >@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq) >{ > int node; > >+ cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node); >+ > set_bit(IO_WQ_BIT_EXIT, >state); > if (wq->manager) > kthread_stop(wq->manager); >@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq) >{ > return wq->manager; >} >+ >+static bool io_wq_worker_affinity(struct io_worker *worker, void *data) >+{ >+ struct task_struct *task = worker->task; >+ unsigned long flags; >+ struct rq_flags rf; >+ raw_spin_lock_irqsave(>pi_lock, flags); >+ do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); >+ task->flags |= PF_NO_SETAFFINITY; >+ raw_spin_unlock_irqrestore(>pi_lock, flags); >+ return false; >+} >+ >+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) >+{ >+ struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); >+ int i; >+ >+ rcu_read_lock(); >+ for_each_node(i) >+ io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, >>NULL); >+ rcu_read_unlock(); >+ return 0; >+} >+ >+static __init int io_wq_init(void) >+{ >+ int ret; >+ >+ ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online", >+ io_wq_cpu_online, NULL); >+ if (ret < 0) >+ return ret; >+ io_wq_online = ret; >+ return 0; >+} >+subsys_initcall(io_wq_init); > >-- >Jens Axboe
Re: Question on io-wq
On 10/22/20 8:05 PM, Hillf Danton wrote: > On Thu, 22 Oct 2020 08:08:09 -0600 Jens Axboe wrote: >> On 10/22/20 3:02 AM, Zhang,Qiang wrote: >>> >>> Hi Jens Axboe >>> >>> There are some problem in 'io_wqe_worker' thread, when the >>> 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA >>> nodes, due to CPU hotplug, When the last CPU going down, the >>> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes >>> online again, we should restore their cpu bindings? >> >> Something like the below should help in ensuring affinities are >> always correct - trigger an affinity set for an online CPU event. We >> should not need to do it for offlining. Can you test it? > > CPU affinity is intact because of nothing to do on offline, and scheduler > will move the stray workers on to the correct NUMA node if any CPU goes > online, so it's a bit hard to see what is going to be tested. Test it yourself: - Boot with > 1 NUMA node - Start an io_uring, you now get 2 workers, each affinitized to a node - Now offline all CPUs in one node - Online one or more of the CPU in that same node The end result is that the worker on the node that was offlined now has a mask of the other node, plus the newly added CPU. So your last statement isn't correct, which is what the original reporter stated. -- Jens Axboe
Re: Question on io-wq
On 10/22/20 3:02 AM, Zhang,Qiang wrote: > > Hi Jens Axboe > > There are some problem in 'io_wqe_worker' thread, when the > 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA > nodes, due to CPU hotplug, When the last CPU going down, the > 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes > online again, we should restore their cpu bindings? Something like the below should help in ensuring affinities are always correct - trigger an affinity set for an online CPU event. We should not need to do it for offlining. Can you test it? diff --git a/fs/io-wq.c b/fs/io-wq.c index 4012ff541b7b..3bf029d1170e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "io-wq.h" @@ -123,9 +124,13 @@ struct io_wq { refcount_t refs; struct completion done; + struct hlist_node cpuhp_node; + refcount_t use_refs; }; +static enum cpuhp_state io_wq_online; + static bool io_worker_get(struct io_worker *worker) { return refcount_inc_not_zero(>ref); @@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) return ERR_PTR(-ENOMEM); } + ret = cpuhp_state_add_instance_nocalls(io_wq_online, >cpuhp_node); + if (ret) { + kfree(wq->wqes); + kfree(wq); + return ERR_PTR(ret); + } + wq->free_work = data->free_work; wq->do_work = data->do_work; @@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ret = PTR_ERR(wq->manager); complete(>done); err: + cpuhp_state_remove_instance_nocalls(io_wq_online, >cpuhp_node); for_each_node(node) kfree(wq->wqes[node]); kfree(wq->wqes); @@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq) { int node; + cpuhp_state_remove_instance_nocalls(io_wq_online, >cpuhp_node); + set_bit(IO_WQ_BIT_EXIT, >state); if (wq->manager) kthread_stop(wq->manager); @@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq) { return wq->manager; } + +static bool io_wq_worker_affinity(struct io_worker *worker, void *data) +{ + struct task_struct *task = worker->task; + unsigned long flags; + + raw_spin_lock_irqsave(>pi_lock, flags); + do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); + task->flags |= PF_NO_SETAFFINITY; + raw_spin_unlock_irqrestore(>pi_lock, flags); + return false; +} + +static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + int i; + + rcu_read_lock(); + for_each_node(i) + io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL); + rcu_read_unlock(); + return 0; +} + +static __init int io_wq_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", + io_wq_cpu_online, NULL); + if (ret < 0) + return ret; + io_wq_online = ret; + return 0; +} +subsys_initcall(io_wq_init); -- Jens Axboe
Question on io-wq
Hi Jens Axboe There are some problem in 'io_wqe_worker' thread, when the 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA nodes, due to CPU hotplug, When the last CPU going down, the 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes online again, we should restore their cpu bindings? Thanks Qiang
Question on io-wq
Hi Jens Axboe There are some problem in 'io_wqe_worker' thread, when the 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA nodes, due to CPU hotplug, When the last CPU going down, the 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes online again, we should restore their cpu bindings? Thanks Qiang