回复: Question on io-wq

2020-10-22 Thread Zhang, Qiang



发件人: Zhang, Qiang 
发送时间: 2020年10月23日 11:55
收件人: Jens Axboe
抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; 
linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org
主题: 回复: Question on io-wq




发件人: Jens Axboe 
发送时间: 2020年10月22日 22:08
收件人: Zhang, Qiang
抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; 
linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org
主题: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

>Something like the below should help in ensuring affinities are
>always correct - trigger an affinity set for an online CPU event. We
>should not need to do it for offlining. Can you test it?


>diff --git a/fs/io-wq.c b/fs/io-wq.c
>index 4012ff541b7b..3bf029d1170e 100644
>--- a/fs/io-wq.c
>+++ b/fs/io-wq.c
>@@ -19,6 +19,7 @@
 >#include 
 >#include 
 >#include 
>+#include 

 >#include "io-wq.h"
>
>@@ -123,9 +124,13 @@ struct io_wq {
 >   refcount_t refs;
  >  struct completion done;
>
>+   struct hlist_node cpuhp_node;
>+
 >   refcount_t use_refs;
 >};
>
>+static enum cpuhp_state io_wq_online;
>+
 >static bool io_worker_get(struct io_worker *worker)
 >{
   > return refcount_inc_not_zero(>ref);
>@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct 
>io_wq_data *data)
 >   return ERR_PTR(-ENOMEM);
  >  }
>
>+   ret = cpuhp_state_add_instance_nocalls(io_wq_online, >>cpuhp_node);
>+   if (ret) {
>+   kfree(wq->wqes);
>+   kfree(wq);
>+   return ERR_PTR(ret);
>+   }
>+
>wq->free_work = data->free_work;
>wq->do_work = data->do_work;
>
>@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct 
>io_wq_data *data)
 >   ret = PTR_ERR(wq->manager);
 >   complete(>done);
 >err:
>+   cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node);
  >  for_each_node(node)
 >   kfree(wq->wqes[node]);
 >   kfree(wq->wqes);
>@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
 >{
 >   int node;
>
>+   cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node);
>+
   > set_bit(IO_WQ_BIT_EXIT, >state);
  >  if (wq->manager)
 >   kthread_stop(wq->manager);
>@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq)
 >{
 >  return wq->manager;
 >}
>+
>+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
>+{
>+   struct task_struct *task = worker->task;
>+   unsigned long flags;
>+
   struct rq_flags rf;
   struct rq *rq;
   rq = task_rq_lock(task, );

---   raw_spin_lock_irqsave(>pi_lock, flags);
>+   do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
>+   task->flags |= PF_NO_SETAFFINITY;
---  raw_spin_unlock_irqrestore(>pi_lock, flags);
   
  task_rq_unlock(rq, task, );

>+   return false;
>+}
>+
>+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
>+{
>+   struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
>+   int i;
>+
>+   rcu_read_lock();
>+   for_each_node(i)
>+   io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, 
>>NULL);
>+   rcu_read_unlock();
>+   return 0;
>+}
>+
>+static __init int io_wq_init(void)
>+{
>+   int ret;
>+
>+   ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online",
>+   io_wq_cpu_online, NULL);
>+   if (ret < 0)
>+   return ret;
>+   io_wq_online = ret;
>+   return 0;
>+}
>+subsys_initcall(io_wq_init);
>
>--
>Jens Axboe



回复: Question on io-wq

2020-10-22 Thread Zhang, Qiang



发件人: Jens Axboe 
发送时间: 2020年10月22日 22:08
收件人: Zhang, Qiang
抄送: v...@zeniv.linux.org.uk; io-ur...@vger.kernel.org; 
linux-kernel@vger.kernel.org; linux-fsde...@vger.kernel.org
主题: Re: Question on io-wq

On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>
> Hi Jens Axboe
>
> There are some problem in 'io_wqe_worker' thread, when the
> 'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA
> nodes, due to CPU hotplug, When the last CPU going down, the
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes
> online again, we should restore their cpu bindings?

>Something like the below should help in ensuring affinities are
>always correct - trigger an affinity set for an online CPU event. We
>should not need to do it for offlining. Can you test it?


>diff --git a/fs/io-wq.c b/fs/io-wq.c
>index 4012ff541b7b..3bf029d1170e 100644
>--- a/fs/io-wq.c
>+++ b/fs/io-wq.c
>@@ -19,6 +19,7 @@
 >#include 
 >#include 
 >#include 
>+#include 

 >#include "io-wq.h"
>
>@@ -123,9 +124,13 @@ struct io_wq {
 >   refcount_t refs;
  >  struct completion done;
>
>+   struct hlist_node cpuhp_node;
>+
 >   refcount_t use_refs;
 >};
>
>+static enum cpuhp_state io_wq_online;
>+
 >static bool io_worker_get(struct io_worker *worker)
 >{
   > return refcount_inc_not_zero(>ref);
>@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, >struct 
>io_wq_data *data)
 >   return ERR_PTR(-ENOMEM);
  >  }
>
>+   ret = cpuhp_state_add_instance_nocalls(io_wq_online, >>cpuhp_node);
>+   if (ret) {
>+   kfree(wq->wqes);
>+   kfree(wq);
>+   return ERR_PTR(ret);
>+   }
>+
>wq->free_work = data->free_work;
>wq->do_work = data->do_work;
>
>@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, >struct 
>io_wq_data *data)
 >   ret = PTR_ERR(wq->manager);
 >   complete(>done);
 >err:
>+   cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node);
  >  for_each_node(node)
 >   kfree(wq->wqes[node]);
 >   kfree(wq->wqes);
>@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
 >{
 >   int node;
>
>+   cpuhp_state_remove_instance_nocalls(io_wq_online, >>cpuhp_node);
>+
   > set_bit(IO_WQ_BIT_EXIT, >state);
  >  if (wq->manager)
 >   kthread_stop(wq->manager);
>@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq >*wq)
 >{
 >  return wq->manager;
 >}
>+
>+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
>+{
>+   struct task_struct *task = worker->task;
>+   unsigned long flags;
>+
   struct rq_flags rf;


>+   raw_spin_lock_irqsave(>pi_lock, flags);
>+   do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
>+   task->flags |= PF_NO_SETAFFINITY;
>+   raw_spin_unlock_irqrestore(>pi_lock, flags);


>+   return false;
>+}
>+
>+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
>+{
>+   struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
>+   int i;
>+
>+   rcu_read_lock();
>+   for_each_node(i)
>+   io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, 
>>NULL);
>+   rcu_read_unlock();
>+   return 0;
>+}
>+
>+static __init int io_wq_init(void)
>+{
>+   int ret;
>+
>+   ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, >"io->wq/online",
>+   io_wq_cpu_online, NULL);
>+   if (ret < 0)
>+   return ret;
>+   io_wq_online = ret;
>+   return 0;
>+}
>+subsys_initcall(io_wq_init);
>
>--
>Jens Axboe



Re: Question on io-wq

2020-10-22 Thread Jens Axboe
On 10/22/20 8:05 PM, Hillf Danton wrote:
> On Thu, 22 Oct 2020 08:08:09 -0600 Jens Axboe wrote:
>> On 10/22/20 3:02 AM, Zhang,Qiang wrote:
>>>
>>> Hi Jens Axboe
>>>
>>> There are some problem in 'io_wqe_worker' thread, when the 
>>> 'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA 
>>> nodes, due to CPU hotplug, When the last CPU going down, the 
>>> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes 
>>> online again, we should restore their cpu bindings?
>>
>> Something like the below should help in ensuring affinities are
>> always correct - trigger an affinity set for an online CPU event. We
>> should not need to do it for offlining. Can you test it?
> 
> CPU affinity is intact because of nothing to do on offline, and scheduler
> will move the stray workers on to the correct NUMA node if any CPU goes
> online, so it's a bit hard to see what is going to be tested.

Test it yourself:

- Boot with > 1 NUMA node
- Start an io_uring, you now get 2 workers, each affinitized to a node
- Now offline all CPUs in one node
- Online one or more of the CPU in that same node

The end result is that the worker on the node that was offlined now
has a mask of the other node, plus the newly added CPU.

So your last statement isn't correct, which is what the original
reporter stated.

-- 
Jens Axboe



Re: Question on io-wq

2020-10-22 Thread Jens Axboe
On 10/22/20 3:02 AM, Zhang,Qiang wrote:
> 
> Hi Jens Axboe
> 
> There are some problem in 'io_wqe_worker' thread, when the 
> 'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA 
> nodes, due to CPU hotplug, When the last CPU going down, the 
> 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes 
> online again, we should restore their cpu bindings?

Something like the below should help in ensuring affinities are
always correct - trigger an affinity set for an online CPU event. We
should not need to do it for offlining. Can you test it?


diff --git a/fs/io-wq.c b/fs/io-wq.c
index 4012ff541b7b..3bf029d1170e 100644
--- a/fs/io-wq.c
+++ b/fs/io-wq.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "io-wq.h"
 
@@ -123,9 +124,13 @@ struct io_wq {
refcount_t refs;
struct completion done;
 
+   struct hlist_node cpuhp_node;
+
refcount_t use_refs;
 };
 
+static enum cpuhp_state io_wq_online;
+
 static bool io_worker_get(struct io_worker *worker)
 {
return refcount_inc_not_zero(>ref);
@@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, struct 
io_wq_data *data)
return ERR_PTR(-ENOMEM);
}
 
+   ret = cpuhp_state_add_instance_nocalls(io_wq_online, >cpuhp_node);
+   if (ret) {
+   kfree(wq->wqes);
+   kfree(wq);
+   return ERR_PTR(ret);
+   }
+
wq->free_work = data->free_work;
wq->do_work = data->do_work;
 
@@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct 
io_wq_data *data)
ret = PTR_ERR(wq->manager);
complete(>done);
 err:
+   cpuhp_state_remove_instance_nocalls(io_wq_online, >cpuhp_node);
for_each_node(node)
kfree(wq->wqes[node]);
kfree(wq->wqes);
@@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq)
 {
int node;
 
+   cpuhp_state_remove_instance_nocalls(io_wq_online, >cpuhp_node);
+
set_bit(IO_WQ_BIT_EXIT, >state);
if (wq->manager)
kthread_stop(wq->manager);
@@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq)
 {
return wq->manager;
 }
+
+static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
+{
+   struct task_struct *task = worker->task;
+   unsigned long flags;
+
+   raw_spin_lock_irqsave(>pi_lock, flags);
+   do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node));
+   task->flags |= PF_NO_SETAFFINITY;
+   raw_spin_unlock_irqrestore(>pi_lock, flags);
+   return false;
+}
+
+static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+   struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node);
+   int i;
+
+   rcu_read_lock();
+   for_each_node(i)
+   io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL);
+   rcu_read_unlock();
+   return 0;
+}
+
+static __init int io_wq_init(void)
+{
+   int ret;
+
+   ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online",
+   io_wq_cpu_online, NULL);
+   if (ret < 0)
+   return ret;
+   io_wq_online = ret;
+   return 0;
+}
+subsys_initcall(io_wq_init);

-- 
Jens Axboe



Question on io-wq

2020-10-22 Thread Zhang, Qiang

Hi Jens Axboe

There are some problem in 'io_wqe_worker' thread, when the 
'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA 
nodes, due to CPU hotplug, When the last CPU going down, the 
'io_wqe_worker' thread will run anywhere. when the CPU in the node goes 
online again, we should restore their cpu bindings?

Thanks
Qiang



Question on io-wq

2020-10-22 Thread Zhang,Qiang



Hi Jens Axboe

There are some problem in 'io_wqe_worker' thread, when the 
'io_wqe_worker' be create and  Setting the affinity of CPUs in NUMA 
nodes, due to CPU hotplug, When the last CPU going down, the 
'io_wqe_worker' thread will run anywhere. when the CPU in the node goes 
online again, we should restore their cpu bindings?


Thanks
Qiang