> On 14 Mar 2019, at 07.22, Javier González <[email protected]> wrote:
> 
>> On 14 Mar 2019, at 07.17, Marcin Dziegielewski 
>> <[email protected]> wrote:
>> 
>> In some cases write thread migration between cpus can cause
>> sending writes in improper order and in consequence a lot of
>> errors from device.
>> 
>> Write thread affinity to particular cpu prevent before it.
>> 
>> Signed-off-by: Marcin Dziegielewski <[email protected]>
>> ---
>> drivers/lightnvm/pblk-init.c | 30 +++++++++++++++++++++++++++++-
>> drivers/lightnvm/pblk.h      |  1 +
>> drivers/nvme/host/lightnvm.c |  1 +
>> include/linux/lightnvm.h     |  2 ++
>> 4 files changed, 33 insertions(+), 1 deletion(-)
>> 
>> diff --git a/drivers/lightnvm/pblk-init.c b/drivers/lightnvm/pblk-init.c
>> index 81e8ed4..bd25004 100644
>> --- a/drivers/lightnvm/pblk-init.c
>> +++ b/drivers/lightnvm/pblk-init.c
>> @@ -21,6 +21,8 @@
>> 
>> #include "pblk.h"
>> #include "pblk-trace.h"
>> +#include <linux/cpumask.h>
>> +#include <linux/numa.h>
>> 
>> static unsigned int write_buffer_size;
>> 
>> @@ -47,6 +49,8 @@ struct pblk_global_caches {
>> 
>> struct bio_set pblk_bio_set;
>> 
>> +cpumask_t free_cpumask;
>> +
>> static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
>>                        struct bio *bio)
>> {
>> @@ -1098,6 +1102,9 @@ static int pblk_lines_init(struct pblk *pblk)
>> 
>> static int pblk_writer_init(struct pblk *pblk)
>> {
>> +    cpumask_t tmp_cpumask, cpumask;
>> +    int cpu;
>> +
>>      pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
>>      if (IS_ERR(pblk->writer_ts)) {
>>              int err = PTR_ERR(pblk->writer_ts);
>> @@ -1108,6 +1115,22 @@ static int pblk_writer_init(struct pblk *pblk)
>>              return err;
>>      }
>> 
>> +    cpumask_and(&tmp_cpumask, cpumask_of_node(pblk->dev->parent->node),
>> +                    cpu_online_mask);
>> +    cpumask_and(&cpumask, &tmp_cpumask, &free_cpumask);
>> +
>> +    if (!cpumask_weight(&free_cpumask)) {
>> +            free_cpumask = CPU_MASK_ALL;
>> +            cpumask_and(&cpumask, &tmp_cpumask, &free_cpumask);
>> +    }
>> +
>> +    cpu = cpumask_last(&cpumask);
>> +
>> +    kthread_bind(pblk->writer_ts, cpu);
>> +
>> +    cpumask_clear_cpu(cpu, &free_cpumask);
>> +    pblk->writer_cpu = cpu;
>> +
>>      timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0);
>>      mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));
>> 
>> @@ -1126,8 +1149,11 @@ static void pblk_writer_stop(struct pblk *pblk)
>>                      "Stopping not fully synced write buffer\n");
>> 
>>      del_timer_sync(&pblk->wtimer);
>> -    if (pblk->writer_ts)
>> +    if (pblk->writer_ts) {
>> +            set_cpus_allowed_ptr(pblk->writer_ts, cpu_online_mask);
>>              kthread_stop(pblk->writer_ts);
>> +            cpumask_set_cpu(pblk->writer_cpu, &free_cpumask);
>> +    }
>> }
>> 
>> static void pblk_free(struct pblk *pblk)
>> @@ -1328,6 +1354,8 @@ static int __init pblk_module_init(void)
>> {
>>      int ret;
>> 
>> +    free_cpumask = CPU_MASK_ALL;
>> +
>>      ret = bioset_init(&pblk_bio_set, BIO_POOL_SIZE, 0, 0);
>>      if (ret)
>>              return ret;
>> diff --git a/drivers/lightnvm/pblk.h b/drivers/lightnvm/pblk.h
>> index 381f074..650f983 100644
>> --- a/drivers/lightnvm/pblk.h
>> +++ b/drivers/lightnvm/pblk.h
>> @@ -690,6 +690,7 @@ struct pblk {
>>      atomic_t inflight_io;           /* General inflight I/O counter */
>> 
>>      struct task_struct *writer_ts;
>> +    int writer_cpu;
>> 
>>      /* Simple translation map of logical addresses to physical addresses.
>>       * The logical addresses is known by the host system, while the physical
>> diff --git a/drivers/nvme/host/lightnvm.c b/drivers/nvme/host/lightnvm.c
>> index 949e29e..971a19f 100644
>> --- a/drivers/nvme/host/lightnvm.c
>> +++ b/drivers/nvme/host/lightnvm.c
>> @@ -982,6 +982,7 @@ int nvme_nvm_register(struct nvme_ns *ns, char 
>> *disk_name, int node)
>>      memcpy(dev->name, disk_name, DISK_NAME_LEN);
>>      dev->ops = &nvme_nvm_dev_ops;
>>      dev->private_data = ns;
>> +    dev->node = node;
>>      ns->ndev = dev;
>> 
>>      return nvm_register(dev);
>> diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h
>> index 5d865a5..312029e 100644
>> --- a/include/linux/lightnvm.h
>> +++ b/include/linux/lightnvm.h
>> @@ -427,6 +427,8 @@ struct nvm_dev {
>>      char name[DISK_NAME_LEN];
>>      void *private_data;
>> 
>> +    int node;
>> +
>>      void *rmap;
>> 
>>      struct mutex mlock;
>> --
>> 1.8.3.1
> 
> We have a per-CPU semaphore that only allows to send a single I/O in
> order to prevent write pointer violations. Are you seeing this error, or
> is it theoretical?
> 
> Javier

I meant per PU (parallel unit)…

Attachment: signature.asc
Description: Message signed with OpenPGP

Reply via email to