RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
> -Original Message-
> From: Jon Mason [mailto:jdma...@kudzu.us]
> Sent: Friday, August 09, 2013 8:04 AM
> To: Wang, Rui Y
> Cc: liu...@gmail.com; Sosnowski, Maciej; Koul, Vinod;
> chenkep...@huawei.com; linux-kernel@vger.kernel.org;
> linux-...@vger.kernel.org; Luck, Tony; Guo, Chaohong; Dan Williams; Jiang,
> Dave
> Subject: Re: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support
> DMA device hotplug
> 
> On Thu, Aug 8, 2013 at 3:59 AM, Wang, Rui Y  wrote:
> > (resend adding cc list)
> 
> The e-mail you are responding to is over a year old, but doesn't appear to 
> have
> been accepted.  I suppose late is better than never...
> 

Yes agreed. We eventually have to fix it.

I recently encountered the same problem (dma_async_device_unregister() hung my 
machine). I was looking for people who cared about it and found this thread.

Thanks
Rui

> Adding Dan Williams new e-mail address and Dave Jiang.
> 
> Thanks,
> Jon
> 
> >
> > Hi Jiang,
> > See my comments inline.
> >
> >> -Original Message-
> >> From: Jiang Liu 
> >> Date: Mon, 14 May 2012 21:47:05 +0800
> >> Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support
> >> DMA device hotplug
> >> To: Dan Williams , Maciej Sosnowski
> >> , Vinod Koul 
> >> Cc: Jiang Liu , Keping Chen
> >> , linux-kernel@vger.kernel.org,
> >> linux-...@vger.kernel.org
> >>
> >> From: Jiang Liu 
> >>
> >> From: Jiang Liu 
> >>
> >> To support DMA device hotplug, dmaengine must correctly manage
> >> reference count for DMA channels. On the other hand, DMA hot path is
> >> performance critical, reference count management code should avoid
> >> polluting global shared cachelines, otherwise it may cause heavy
> performance penalty.
> >>
> >> This patch introduces a lightweight DMA channel reference count
> >> management mechanism by using percpu counter. When DMA device
> hotplug
> >> is disabled, there's no performance penalty. When hotplug is enabled,
> >> a
> >> dma_find_channel()/dma_put_channel() pair adds two local memory
> >> accesses to the hot path.
> >>
> >> Signed-off-by: Jiang Liu 
> >> ---
> >>  drivers/dma/dmaengine.c   |  112
> >> +
> >>  include/linux/dmaengine.h |   29 +++-
> >>  2 files changed, 129 insertions(+), 12 deletions(-)
> >>
> >> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
> >> d3b1c48..eca45c0 100644
> >> --- a/drivers/dma/dmaengine.c
> >> +++ b/drivers/dma/dmaengine.c
> >> @@ -61,12 +61,20 @@
> >>  #include 
> >>  #include 
> >>  #include 
> >> +#include 
> >>
> >>  static DEFINE_MUTEX(dma_list_mutex);  static DEFINE_IDR(dma_idr);
> >> static LIST_HEAD(dma_device_list);  static long
> >> dmaengine_client_count;
> >>
> >> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> >> +static atomic_t dmaengine_dirty;
> >> +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
> >> +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
> >> +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
> >> +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
> >> +
> >>  /* --- sysfs implementation --- */
> >>
> >>  /**
> >> @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
> >>   */
> >>  struct dma_chan *dma_find_channel(enum dma_transaction_type
> tx_type)
> >> {
> >> - return this_cpu_read(channel_table[tx_type]->chan);
> >> + struct dma_chan *chan =
> >> + this_cpu_read(channel_table[tx_type]->chan);
> >> +
> >> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> >> + this_cpu_inc(dmaengine_chan_ref_count);
> >> + if (static_key_false(_quiesce))
> >> + chan = NULL;
> >> +#endif
> >> +
> >> + return chan;
> >>  }
> >>  EXPORT_SYMBOL(dma_find_channel);
> >>
> >> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> >> +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
> >> + if (static_key_false(_quiesce))
> >> + atomic_inc(_dirty);
> >> + this_cpu_inc(dmaengine_chan_ref_count);
> >> +
> >> + return chan;
> >> +}
> >> +EXPORT_SYMBOL(dma_get_channel);
> >> +#endif
> >> +
> >> +/**
> >> + * dma_has_capability 

Re: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Jon Mason
On Thu, Aug 8, 2013 at 3:59 AM, Wang, Rui Y  wrote:
> (resend adding cc list)

The e-mail you are responding to is over a year old, but doesn't
appear to have been accepted.  I suppose late is better than never...

Adding Dan Williams new e-mail address and Dave Jiang.

Thanks,
Jon

>
> Hi Jiang,
> See my comments inline.
>
>> -Original Message-
>> From: Jiang Liu 
>> Date: Mon, 14 May 2012 21:47:05 +0800
>> Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
>> device hotplug
>> To: Dan Williams , Maciej Sosnowski
>> , Vinod Koul 
>> Cc: Jiang Liu , Keping Chen ,
>> linux-kernel@vger.kernel.org, linux-...@vger.kernel.org
>>
>> From: Jiang Liu 
>>
>> From: Jiang Liu 
>>
>> To support DMA device hotplug, dmaengine must correctly manage reference
>> count for DMA channels. On the other hand, DMA hot path is performance
>> critical, reference count management code should avoid polluting global 
>> shared
>> cachelines, otherwise it may cause heavy performance penalty.
>>
>> This patch introduces a lightweight DMA channel reference count management
>> mechanism by using percpu counter. When DMA device hotplug is disabled,
>> there's no performance penalty. When hotplug is enabled, a
>> dma_find_channel()/dma_put_channel() pair adds two local memory accesses
>> to the hot path.
>>
>> Signed-off-by: Jiang Liu 
>> ---
>>  drivers/dma/dmaengine.c   |  112
>> +
>>  include/linux/dmaengine.h |   29 +++-
>>  2 files changed, 129 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
>> d3b1c48..eca45c0 100644
>> --- a/drivers/dma/dmaengine.c
>> +++ b/drivers/dma/dmaengine.c
>> @@ -61,12 +61,20 @@
>>  #include 
>>  #include 
>>  #include 
>> +#include 
>>
>>  static DEFINE_MUTEX(dma_list_mutex);
>>  static DEFINE_IDR(dma_idr);
>>  static LIST_HEAD(dma_device_list);
>>  static long dmaengine_client_count;
>>
>> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
>> +static atomic_t dmaengine_dirty;
>> +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
>> +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
>> +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
>> +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
>> +
>>  /* --- sysfs implementation --- */
>>
>>  /**
>> @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
>>   */
>>  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
>> {
>> - return this_cpu_read(channel_table[tx_type]->chan);
>> + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]->chan);
>> +
>> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
>> + this_cpu_inc(dmaengine_chan_ref_count);
>> + if (static_key_false(_quiesce))
>> + chan = NULL;
>> +#endif
>> +
>> + return chan;
>>  }
>>  EXPORT_SYMBOL(dma_find_channel);
>>
>> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
>> +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
>> + if (static_key_false(_quiesce))
>> + atomic_inc(_dirty);
>> + this_cpu_inc(dmaengine_chan_ref_count);
>> +
>> + return chan;
>> +}
>> +EXPORT_SYMBOL(dma_get_channel);
>> +#endif
>> +
>> +/**
>> + * dma_has_capability - check whether any channel supports tx_type
>> + * @tx_type: transaction type
>> + */
>> +bool dma_has_capability(enum dma_transaction_type tx_type) {
>> + return !!this_cpu_read(channel_table[tx_type]->chan);
>> +}
>> +EXPORT_SYMBOL(dma_has_capability);
>> +
>>  /*
>>   * net_dma_find_channel - find a channel for net_dma
>>   * net_dma has alignment requirements
>> @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
>> dma_chan *net_dma_find_channel(void)  {
>>   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
>> - if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
>> - return NULL;
>>
>> - return chan;
>> + if (chan && is_dma_copy_aligned(chan->device, 1, 1, 1))
>> + return chan;
>> +
>> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
>> + this_cpu_dec(dmaengine_chan_ref_count);
>> +#endif
>> +
>> + return NULL;
>>  }
>>  EXPORT_SYMBOL(net_dma_find_channel);
>>
>> @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
>> dma_transaction_type cap, int n)
>>   return ret;
>>  }
>>
>> +static void dma_channel_quiesce(void)
>> +{
>> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
>> + int cpu;
>> + long ref_count;
>> + int quiesce_count = 0;
>> +
>> + static_key_slow_inc(_quiesce);
>> +
>> + do {
>> + atomic_set(_dirty, 0);
>> + schedule_timeout(1);
>> +
>> + if (atomic_read(_dirty)) {
>> + quiesce_count = 0;
>> + continue;
>> + }
>> +
>> + ref_count = 0;
>> + for_each_possible_cpu(cpu)
>> + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
>> + if (ref_count == 0)
>> + quiesce_count++;
>> + 

RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
(resend adding cc list)

Hi Jiang,
See my comments inline.

> -Original Message-
> From: Jiang Liu 
> Date: Mon, 14 May 2012 21:47:05 +0800
> Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
> device hotplug
> To: Dan Williams , Maciej Sosnowski
> , Vinod Koul 
> Cc: Jiang Liu , Keping Chen ,
> linux-kernel@vger.kernel.org, linux-...@vger.kernel.org
> 
> From: Jiang Liu 
> 
> From: Jiang Liu 
> 
> To support DMA device hotplug, dmaengine must correctly manage reference
> count for DMA channels. On the other hand, DMA hot path is performance
> critical, reference count management code should avoid polluting global shared
> cachelines, otherwise it may cause heavy performance penalty.
> 
> This patch introduces a lightweight DMA channel reference count management
> mechanism by using percpu counter. When DMA device hotplug is disabled,
> there's no performance penalty. When hotplug is enabled, a
> dma_find_channel()/dma_put_channel() pair adds two local memory accesses
> to the hot path.
> 
> Signed-off-by: Jiang Liu 
> ---
>  drivers/dma/dmaengine.c   |  112
> +
>  include/linux/dmaengine.h |   29 +++-
>  2 files changed, 129 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
> d3b1c48..eca45c0 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -61,12 +61,20 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  static DEFINE_MUTEX(dma_list_mutex);
>  static DEFINE_IDR(dma_idr);
>  static LIST_HEAD(dma_device_list);
>  static long dmaengine_client_count;
> 
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> +static atomic_t dmaengine_dirty;
> +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
> +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
> +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
> +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
> +
>  /* --- sysfs implementation --- */
> 
>  /**
> @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
>   */
>  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
> {
> - return this_cpu_read(channel_table[tx_type]->chan);
> + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]->chan);
> +
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + this_cpu_inc(dmaengine_chan_ref_count);
> + if (static_key_false(_quiesce))
> + chan = NULL;
> +#endif
> +
> + return chan;
>  }
>  EXPORT_SYMBOL(dma_find_channel);
> 
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
> + if (static_key_false(_quiesce))
> + atomic_inc(_dirty);
> + this_cpu_inc(dmaengine_chan_ref_count);
> +
> + return chan;
> +}
> +EXPORT_SYMBOL(dma_get_channel);
> +#endif
> +
> +/**
> + * dma_has_capability - check whether any channel supports tx_type
> + * @tx_type: transaction type
> + */
> +bool dma_has_capability(enum dma_transaction_type tx_type) {
> + return !!this_cpu_read(channel_table[tx_type]->chan);
> +}
> +EXPORT_SYMBOL(dma_has_capability);
> +
>  /*
>   * net_dma_find_channel - find a channel for net_dma
>   * net_dma has alignment requirements
> @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
> dma_chan *net_dma_find_channel(void)  {
>   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
> - if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
> - return NULL;
> 
> - return chan;
> + if (chan && is_dma_copy_aligned(chan->device, 1, 1, 1))
> + return chan;
> +
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + this_cpu_dec(dmaengine_chan_ref_count);
> +#endif
> +
> + return NULL;
>  }
>  EXPORT_SYMBOL(net_dma_find_channel);
> 
> @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
> dma_transaction_type cap, int n)
>   return ret;
>  }
> 
> +static void dma_channel_quiesce(void)
> +{
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + int cpu;
> + long ref_count;
> + int quiesce_count = 0;
> +
> + static_key_slow_inc(_quiesce);
> +
> + do {
> + atomic_set(_dirty, 0);
> + schedule_timeout(1);
> +
> + if (atomic_read(_dirty)) {
> + quiesce_count = 0;
> + continue;
> + }
> +
> + ref_count = 0;
> + for_each_possible_cpu(cpu)
> + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
> + if (ref_count == 0)
> + quiesce_count++;
> + else
> + quiesce_count = 0;
> + } while (quiesce_count < 10);
> +
> + static_key_slow_dec(_quiesce);
> +#endif
> +}
> +

The purpose of dma_channel_quiesce() is unclear to me. It waits until 
dmaengine_chan_ref_count is 0, but how do you prevent someone from using dma 
after it returns?



> @@ -767,14 +848,22 @@ void dma_async_device_unregister(struct
> dma_device 

RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
Hi Jiang,
See my comments inline.

> -Original Message-
> From: Jiang Liu 
> Date: Mon, 14 May 2012 21:47:05 +0800
> Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
> device hotplug
> To: Dan Williams , Maciej Sosnowski
> , Vinod Koul 
> Cc: Jiang Liu , Keping Chen ,
> linux-kernel@vger.kernel.org, linux-...@vger.kernel.org
> 
> From: Jiang Liu 
> 
> From: Jiang Liu 
> 
> To support DMA device hotplug, dmaengine must correctly manage reference
> count for DMA channels. On the other hand, DMA hot path is performance
> critical, reference count management code should avoid polluting global shared
> cachelines, otherwise it may cause heavy performance penalty.
> 
> This patch introduces a lightweight DMA channel reference count management
> mechanism by using percpu counter. When DMA device hotplug is disabled,
> there's no performance penalty. When hotplug is enabled, a
> dma_find_channel()/dma_put_channel() pair adds two local memory accesses
> to the hot path.
> 
> Signed-off-by: Jiang Liu 
> ---
>  drivers/dma/dmaengine.c   |  112
> +
>  include/linux/dmaengine.h |   29 +++-
>  2 files changed, 129 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
> d3b1c48..eca45c0 100644
> --- a/drivers/dma/dmaengine.c
> +++ b/drivers/dma/dmaengine.c
> @@ -61,12 +61,20 @@
>  #include 
>  #include 
>  #include 
> +#include 
> 
>  static DEFINE_MUTEX(dma_list_mutex);
>  static DEFINE_IDR(dma_idr);
>  static LIST_HEAD(dma_device_list);
>  static long dmaengine_client_count;
> 
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> +static atomic_t dmaengine_dirty;
> +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
> +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
> +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
> +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
> +
>  /* --- sysfs implementation --- */
> 
>  /**
> @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
>   */
>  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
> {
> - return this_cpu_read(channel_table[tx_type]->chan);
> + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]->chan);
> +
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + this_cpu_inc(dmaengine_chan_ref_count);
> + if (static_key_false(_quiesce))
> + chan = NULL;
> +#endif
> +
> + return chan;
>  }
>  EXPORT_SYMBOL(dma_find_channel);
> 
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
> + if (static_key_false(_quiesce))
> + atomic_inc(_dirty);
> + this_cpu_inc(dmaengine_chan_ref_count);
> +
> + return chan;
> +}
> +EXPORT_SYMBOL(dma_get_channel);
> +#endif
> +
> +/**
> + * dma_has_capability - check whether any channel supports tx_type
> + * @tx_type: transaction type
> + */
> +bool dma_has_capability(enum dma_transaction_type tx_type) {
> + return !!this_cpu_read(channel_table[tx_type]->chan);
> +}
> +EXPORT_SYMBOL(dma_has_capability);
> +
>  /*
>   * net_dma_find_channel - find a channel for net_dma
>   * net_dma has alignment requirements
> @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
> dma_chan *net_dma_find_channel(void)  {
>   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
> - if (chan && !is_dma_copy_aligned(chan->device, 1, 1, 1))
> - return NULL;
> 
> - return chan;
> + if (chan && is_dma_copy_aligned(chan->device, 1, 1, 1))
> + return chan;
> +
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + this_cpu_dec(dmaengine_chan_ref_count);
> +#endif
> +
> + return NULL;
>  }
>  EXPORT_SYMBOL(net_dma_find_channel);
> 
> @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
> dma_transaction_type cap, int n)
>   return ret;
>  }
> 
> +static void dma_channel_quiesce(void)
> +{
> +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
> + int cpu;
> + long ref_count;
> + int quiesce_count = 0;
> +
> + static_key_slow_inc(_quiesce);
> +
> + do {
> + atomic_set(_dirty, 0);
> + schedule_timeout(1);
> +
> + if (atomic_read(_dirty)) {
> + quiesce_count = 0;
> + continue;
> + }
> +
> + ref_count = 0;
> + for_each_possible_cpu(cpu)
> + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
> + if (ref_count == 0)
> + quiesce_count++;
> + else
> + quiesce_count = 0;
> + } while (quiesce_count < 10);
> +
> + static_key_slow_dec(_quiesce);
> +#endif
> +}
> +

The purpose of dma_channel_quiesce() is unclear to me. It waits until 
dmaengine_chan_ref_count is 0, but how do you prevent someone from using dma 
after it returns?



> @@ -767,14 +848,22 @@ void dma_async_device_unregister(struct
> dma_device *device)
> 
>   

RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
Hi Jiang,
See my comments inline.

 -Original Message-
 From: Jiang Liu liu...@gmail.com
 Date: Mon, 14 May 2012 21:47:05 +0800
 Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
 device hotplug
 To: Dan Williams dan.j.willi...@intel.com, Maciej Sosnowski
 maciej.sosnow...@intel.com, Vinod Koul vinod.k...@intel.com
 Cc: Jiang Liu liu...@gmail.com, Keping Chen chenkep...@huawei.com,
 linux-kernel@vger.kernel.org, linux-...@vger.kernel.org
 
 From: Jiang Liu liu...@gmail.com
 
 From: Jiang Liu liu...@gmail.com
 
 To support DMA device hotplug, dmaengine must correctly manage reference
 count for DMA channels. On the other hand, DMA hot path is performance
 critical, reference count management code should avoid polluting global shared
 cachelines, otherwise it may cause heavy performance penalty.
 
 This patch introduces a lightweight DMA channel reference count management
 mechanism by using percpu counter. When DMA device hotplug is disabled,
 there's no performance penalty. When hotplug is enabled, a
 dma_find_channel()/dma_put_channel() pair adds two local memory accesses
 to the hot path.
 
 Signed-off-by: Jiang Liu liu...@gmail.com
 ---
  drivers/dma/dmaengine.c   |  112
 +
  include/linux/dmaengine.h |   29 +++-
  2 files changed, 129 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
 d3b1c48..eca45c0 100644
 --- a/drivers/dma/dmaengine.c
 +++ b/drivers/dma/dmaengine.c
 @@ -61,12 +61,20 @@
  #include linux/rculist.h
  #include linux/idr.h
  #include linux/slab.h
 +#include linux/sched.h
 
  static DEFINE_MUTEX(dma_list_mutex);
  static DEFINE_IDR(dma_idr);
  static LIST_HEAD(dma_device_list);
  static long dmaengine_client_count;
 
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +static atomic_t dmaengine_dirty;
 +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
 +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
 +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
 +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
 +
  /* --- sysfs implementation --- */
 
  /**
 @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
   */
  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
 {
 - return this_cpu_read(channel_table[tx_type]-chan);
 + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]-chan);
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_inc(dmaengine_chan_ref_count);
 + if (static_key_false(dmaengine_quiesce))
 + chan = NULL;
 +#endif
 +
 + return chan;
  }
  EXPORT_SYMBOL(dma_find_channel);
 
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
 + if (static_key_false(dmaengine_quiesce))
 + atomic_inc(dmaengine_dirty);
 + this_cpu_inc(dmaengine_chan_ref_count);
 +
 + return chan;
 +}
 +EXPORT_SYMBOL(dma_get_channel);
 +#endif
 +
 +/**
 + * dma_has_capability - check whether any channel supports tx_type
 + * @tx_type: transaction type
 + */
 +bool dma_has_capability(enum dma_transaction_type tx_type) {
 + return !!this_cpu_read(channel_table[tx_type]-chan);
 +}
 +EXPORT_SYMBOL(dma_has_capability);
 +
  /*
   * net_dma_find_channel - find a channel for net_dma
   * net_dma has alignment requirements
 @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
 dma_chan *net_dma_find_channel(void)  {
   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
 - if (chan  !is_dma_copy_aligned(chan-device, 1, 1, 1))
 - return NULL;
 
 - return chan;
 + if (chan  is_dma_copy_aligned(chan-device, 1, 1, 1))
 + return chan;
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_dec(dmaengine_chan_ref_count);
 +#endif
 +
 + return NULL;
  }
  EXPORT_SYMBOL(net_dma_find_channel);
 
 @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
 dma_transaction_type cap, int n)
   return ret;
  }
 
 +static void dma_channel_quiesce(void)
 +{
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + int cpu;
 + long ref_count;
 + int quiesce_count = 0;
 +
 + static_key_slow_inc(dmaengine_quiesce);
 +
 + do {
 + atomic_set(dmaengine_dirty, 0);
 + schedule_timeout(1);
 +
 + if (atomic_read(dmaengine_dirty)) {
 + quiesce_count = 0;
 + continue;
 + }
 +
 + ref_count = 0;
 + for_each_possible_cpu(cpu)
 + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
 + if (ref_count == 0)
 + quiesce_count++;
 + else
 + quiesce_count = 0;
 + } while (quiesce_count  10);
 +
 + static_key_slow_dec(dmaengine_quiesce);
 +#endif
 +}
 +

The purpose of dma_channel_quiesce() is unclear to me. It waits until 
dmaengine_chan_ref_count is 0, but how do you prevent someone from using dma 

RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
(resend adding cc list)

Hi Jiang,
See my comments inline.

 -Original Message-
 From: Jiang Liu liu...@gmail.com
 Date: Mon, 14 May 2012 21:47:05 +0800
 Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
 device hotplug
 To: Dan Williams dan.j.willi...@intel.com, Maciej Sosnowski
 maciej.sosnow...@intel.com, Vinod Koul vinod.k...@intel.com
 Cc: Jiang Liu liu...@gmail.com, Keping Chen chenkep...@huawei.com,
 linux-kernel@vger.kernel.org, linux-...@vger.kernel.org
 
 From: Jiang Liu liu...@gmail.com
 
 From: Jiang Liu liu...@gmail.com
 
 To support DMA device hotplug, dmaengine must correctly manage reference
 count for DMA channels. On the other hand, DMA hot path is performance
 critical, reference count management code should avoid polluting global shared
 cachelines, otherwise it may cause heavy performance penalty.
 
 This patch introduces a lightweight DMA channel reference count management
 mechanism by using percpu counter. When DMA device hotplug is disabled,
 there's no performance penalty. When hotplug is enabled, a
 dma_find_channel()/dma_put_channel() pair adds two local memory accesses
 to the hot path.
 
 Signed-off-by: Jiang Liu liu...@gmail.com
 ---
  drivers/dma/dmaengine.c   |  112
 +
  include/linux/dmaengine.h |   29 +++-
  2 files changed, 129 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
 d3b1c48..eca45c0 100644
 --- a/drivers/dma/dmaengine.c
 +++ b/drivers/dma/dmaengine.c
 @@ -61,12 +61,20 @@
  #include linux/rculist.h
  #include linux/idr.h
  #include linux/slab.h
 +#include linux/sched.h
 
  static DEFINE_MUTEX(dma_list_mutex);
  static DEFINE_IDR(dma_idr);
  static LIST_HEAD(dma_device_list);
  static long dmaengine_client_count;
 
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +static atomic_t dmaengine_dirty;
 +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
 +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
 +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
 +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
 +
  /* --- sysfs implementation --- */
 
  /**
 @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
   */
  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
 {
 - return this_cpu_read(channel_table[tx_type]-chan);
 + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]-chan);
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_inc(dmaengine_chan_ref_count);
 + if (static_key_false(dmaengine_quiesce))
 + chan = NULL;
 +#endif
 +
 + return chan;
  }
  EXPORT_SYMBOL(dma_find_channel);
 
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
 + if (static_key_false(dmaengine_quiesce))
 + atomic_inc(dmaengine_dirty);
 + this_cpu_inc(dmaengine_chan_ref_count);
 +
 + return chan;
 +}
 +EXPORT_SYMBOL(dma_get_channel);
 +#endif
 +
 +/**
 + * dma_has_capability - check whether any channel supports tx_type
 + * @tx_type: transaction type
 + */
 +bool dma_has_capability(enum dma_transaction_type tx_type) {
 + return !!this_cpu_read(channel_table[tx_type]-chan);
 +}
 +EXPORT_SYMBOL(dma_has_capability);
 +
  /*
   * net_dma_find_channel - find a channel for net_dma
   * net_dma has alignment requirements
 @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
 dma_chan *net_dma_find_channel(void)  {
   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
 - if (chan  !is_dma_copy_aligned(chan-device, 1, 1, 1))
 - return NULL;
 
 - return chan;
 + if (chan  is_dma_copy_aligned(chan-device, 1, 1, 1))
 + return chan;
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_dec(dmaengine_chan_ref_count);
 +#endif
 +
 + return NULL;
  }
  EXPORT_SYMBOL(net_dma_find_channel);
 
 @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
 dma_transaction_type cap, int n)
   return ret;
  }
 
 +static void dma_channel_quiesce(void)
 +{
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + int cpu;
 + long ref_count;
 + int quiesce_count = 0;
 +
 + static_key_slow_inc(dmaengine_quiesce);
 +
 + do {
 + atomic_set(dmaengine_dirty, 0);
 + schedule_timeout(1);
 +
 + if (atomic_read(dmaengine_dirty)) {
 + quiesce_count = 0;
 + continue;
 + }
 +
 + ref_count = 0;
 + for_each_possible_cpu(cpu)
 + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
 + if (ref_count == 0)
 + quiesce_count++;
 + else
 + quiesce_count = 0;
 + } while (quiesce_count  10);
 +
 + static_key_slow_dec(dmaengine_quiesce);
 +#endif
 +}
 +

The purpose of dma_channel_quiesce() is unclear to me. It waits until 
dmaengine_chan_ref_count is 0, but how do you prevent 

Re: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Jon Mason
On Thu, Aug 8, 2013 at 3:59 AM, Wang, Rui Y rui.y.w...@intel.com wrote:
 (resend adding cc list)

The e-mail you are responding to is over a year old, but doesn't
appear to have been accepted.  I suppose late is better than never...

Adding Dan Williams new e-mail address and Dave Jiang.

Thanks,
Jon


 Hi Jiang,
 See my comments inline.

 -Original Message-
 From: Jiang Liu liu...@gmail.com
 Date: Mon, 14 May 2012 21:47:05 +0800
 Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA
 device hotplug
 To: Dan Williams dan.j.willi...@intel.com, Maciej Sosnowski
 maciej.sosnow...@intel.com, Vinod Koul vinod.k...@intel.com
 Cc: Jiang Liu liu...@gmail.com, Keping Chen chenkep...@huawei.com,
 linux-kernel@vger.kernel.org, linux-...@vger.kernel.org

 From: Jiang Liu liu...@gmail.com

 From: Jiang Liu liu...@gmail.com

 To support DMA device hotplug, dmaengine must correctly manage reference
 count for DMA channels. On the other hand, DMA hot path is performance
 critical, reference count management code should avoid polluting global 
 shared
 cachelines, otherwise it may cause heavy performance penalty.

 This patch introduces a lightweight DMA channel reference count management
 mechanism by using percpu counter. When DMA device hotplug is disabled,
 there's no performance penalty. When hotplug is enabled, a
 dma_find_channel()/dma_put_channel() pair adds two local memory accesses
 to the hot path.

 Signed-off-by: Jiang Liu liu...@gmail.com
 ---
  drivers/dma/dmaengine.c   |  112
 +
  include/linux/dmaengine.h |   29 +++-
  2 files changed, 129 insertions(+), 12 deletions(-)

 diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
 d3b1c48..eca45c0 100644
 --- a/drivers/dma/dmaengine.c
 +++ b/drivers/dma/dmaengine.c
 @@ -61,12 +61,20 @@
  #include linux/rculist.h
  #include linux/idr.h
  #include linux/slab.h
 +#include linux/sched.h

  static DEFINE_MUTEX(dma_list_mutex);
  static DEFINE_IDR(dma_idr);
  static LIST_HEAD(dma_device_list);
  static long dmaengine_client_count;

 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +static atomic_t dmaengine_dirty;
 +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
 +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
 +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
 +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
 +
  /* --- sysfs implementation --- */

  /**
 @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
   */
  struct dma_chan *dma_find_channel(enum dma_transaction_type tx_type)
 {
 - return this_cpu_read(channel_table[tx_type]-chan);
 + struct dma_chan *chan = this_cpu_read(channel_table[tx_type]-chan);
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_inc(dmaengine_chan_ref_count);
 + if (static_key_false(dmaengine_quiesce))
 + chan = NULL;
 +#endif
 +
 + return chan;
  }
  EXPORT_SYMBOL(dma_find_channel);

 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
 + if (static_key_false(dmaengine_quiesce))
 + atomic_inc(dmaengine_dirty);
 + this_cpu_inc(dmaengine_chan_ref_count);
 +
 + return chan;
 +}
 +EXPORT_SYMBOL(dma_get_channel);
 +#endif
 +
 +/**
 + * dma_has_capability - check whether any channel supports tx_type
 + * @tx_type: transaction type
 + */
 +bool dma_has_capability(enum dma_transaction_type tx_type) {
 + return !!this_cpu_read(channel_table[tx_type]-chan);
 +}
 +EXPORT_SYMBOL(dma_has_capability);
 +
  /*
   * net_dma_find_channel - find a channel for net_dma
   * net_dma has alignment requirements
 @@ -316,10 +354,15 @@ EXPORT_SYMBOL(dma_find_channel);  struct
 dma_chan *net_dma_find_channel(void)  {
   struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
 - if (chan  !is_dma_copy_aligned(chan-device, 1, 1, 1))
 - return NULL;

 - return chan;
 + if (chan  is_dma_copy_aligned(chan-device, 1, 1, 1))
 + return chan;
 +
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + this_cpu_dec(dmaengine_chan_ref_count);
 +#endif
 +
 + return NULL;
  }
  EXPORT_SYMBOL(net_dma_find_channel);

 @@ -393,6 +436,37 @@ static struct dma_chan *nth_chan(enum
 dma_transaction_type cap, int n)
   return ret;
  }

 +static void dma_channel_quiesce(void)
 +{
 +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
 + int cpu;
 + long ref_count;
 + int quiesce_count = 0;
 +
 + static_key_slow_inc(dmaengine_quiesce);
 +
 + do {
 + atomic_set(dmaengine_dirty, 0);
 + schedule_timeout(1);
 +
 + if (atomic_read(dmaengine_dirty)) {
 + quiesce_count = 0;
 + continue;
 + }
 +
 + ref_count = 0;
 + for_each_possible_cpu(cpu)
 + ref_count += per_cpu(dmaengine_chan_ref_count, cpu);
 + if (ref_count == 0)
 + quiesce_count++;
 + else
 +  

RE: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support DMA device hotplug

2013-08-08 Thread Wang, Rui Y
 -Original Message-
 From: Jon Mason [mailto:jdma...@kudzu.us]
 Sent: Friday, August 09, 2013 8:04 AM
 To: Wang, Rui Y
 Cc: liu...@gmail.com; Sosnowski, Maciej; Koul, Vinod;
 chenkep...@huawei.com; linux-kernel@vger.kernel.org;
 linux-...@vger.kernel.org; Luck, Tony; Guo, Chaohong; Dan Williams; Jiang,
 Dave
 Subject: Re: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support
 DMA device hotplug
 
 On Thu, Aug 8, 2013 at 3:59 AM, Wang, Rui Y rui.y.w...@intel.com wrote:
  (resend adding cc list)
 
 The e-mail you are responding to is over a year old, but doesn't appear to 
 have
 been accepted.  I suppose late is better than never...
 

Yes agreed. We eventually have to fix it.

I recently encountered the same problem (dma_async_device_unregister() hung my 
machine). I was looking for people who cared about it and found this thread.

Thanks
Rui

 Adding Dan Williams new e-mail address and Dave Jiang.
 
 Thanks,
 Jon
 
 
  Hi Jiang,
  See my comments inline.
 
  -Original Message-
  From: Jiang Liu liu...@gmail.com
  Date: Mon, 14 May 2012 21:47:05 +0800
  Subject: [RFC PATCH v2 3/7] dmaengine: enhance dmaengine to support
  DMA device hotplug
  To: Dan Williams dan.j.willi...@intel.com, Maciej Sosnowski
  maciej.sosnow...@intel.com, Vinod Koul vinod.k...@intel.com
  Cc: Jiang Liu liu...@gmail.com, Keping Chen
  chenkep...@huawei.com, linux-kernel@vger.kernel.org,
  linux-...@vger.kernel.org
 
  From: Jiang Liu liu...@gmail.com
 
  From: Jiang Liu liu...@gmail.com
 
  To support DMA device hotplug, dmaengine must correctly manage
  reference count for DMA channels. On the other hand, DMA hot path is
  performance critical, reference count management code should avoid
  polluting global shared cachelines, otherwise it may cause heavy
 performance penalty.
 
  This patch introduces a lightweight DMA channel reference count
  management mechanism by using percpu counter. When DMA device
 hotplug
  is disabled, there's no performance penalty. When hotplug is enabled,
  a
  dma_find_channel()/dma_put_channel() pair adds two local memory
  accesses to the hot path.
 
  Signed-off-by: Jiang Liu liu...@gmail.com
  ---
   drivers/dma/dmaengine.c   |  112
  +
   include/linux/dmaengine.h |   29 +++-
   2 files changed, 129 insertions(+), 12 deletions(-)
 
  diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index
  d3b1c48..eca45c0 100644
  --- a/drivers/dma/dmaengine.c
  +++ b/drivers/dma/dmaengine.c
  @@ -61,12 +61,20 @@
   #include linux/rculist.h
   #include linux/idr.h
   #include linux/slab.h
  +#include linux/sched.h
 
   static DEFINE_MUTEX(dma_list_mutex);  static DEFINE_IDR(dma_idr);
  static LIST_HEAD(dma_device_list);  static long
  dmaengine_client_count;
 
  +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
  +static atomic_t dmaengine_dirty;
  +static struct static_key dmaengine_quiesce = STATIC_KEY_INIT_FALSE;
  +static DECLARE_WAIT_QUEUE_HEAD(dmaengine_wait_queue);
  +DEFINE_PER_CPU(long, dmaengine_chan_ref_count);
  +#endif   /* CONFIG_DMA_ENGINE_HOTPLUG */
  +
   /* --- sysfs implementation --- */
 
   /**
  @@ -305,10 +313,40 @@ arch_initcall(dma_channel_table_init);
*/
   struct dma_chan *dma_find_channel(enum dma_transaction_type
 tx_type)
  {
  - return this_cpu_read(channel_table[tx_type]-chan);
  + struct dma_chan *chan =
  + this_cpu_read(channel_table[tx_type]-chan);
  +
  +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
  + this_cpu_inc(dmaengine_chan_ref_count);
  + if (static_key_false(dmaengine_quiesce))
  + chan = NULL;
  +#endif
  +
  + return chan;
   }
   EXPORT_SYMBOL(dma_find_channel);
 
  +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
  +struct dma_chan *dma_get_channel(struct dma_chan *chan) {
  + if (static_key_false(dmaengine_quiesce))
  + atomic_inc(dmaengine_dirty);
  + this_cpu_inc(dmaengine_chan_ref_count);
  +
  + return chan;
  +}
  +EXPORT_SYMBOL(dma_get_channel);
  +#endif
  +
  +/**
  + * dma_has_capability - check whether any channel supports tx_type
  + * @tx_type: transaction type
  + */
  +bool dma_has_capability(enum dma_transaction_type tx_type) {
  + return !!this_cpu_read(channel_table[tx_type]-chan);
  +}
  +EXPORT_SYMBOL(dma_has_capability);
  +
   /*
* net_dma_find_channel - find a channel for net_dma
* net_dma has alignment requirements @@ -316,10 +354,15 @@
  EXPORT_SYMBOL(dma_find_channel);  struct dma_chan
  *net_dma_find_channel(void)  {
struct dma_chan *chan = dma_find_channel(DMA_MEMCPY);
  - if (chan  !is_dma_copy_aligned(chan-device, 1, 1, 1))
  - return NULL;
 
  - return chan;
  + if (chan  is_dma_copy_aligned(chan-device, 1, 1, 1))
  + return chan;
  +
  +#ifdef   CONFIG_DMA_ENGINE_HOTPLUG
  + this_cpu_dec(dmaengine_chan_ref_count);
  +#endif
  +
  + return NULL;
   }
   EXPORT_SYMBOL(net_dma_find_channel);
 
  @@ -393,6 +436,37 @@ static struct dma_chan