Re: [PATCH] rcu: fix the OOM problem of huge IP abnormal packet traffic

2016-11-18 Thread Ding Tianhong


On 2016/11/18 21:01, Paul E. McKenney wrote:
> On Fri, Nov 18, 2016 at 08:40:09PM +0800, Ding Tianhong wrote:
>> The commit bedc196915 ("rcu: Fix soft lockup for rcu_nocb_kthread")
>> will introduce a new problem that when huge IP abnormal packet arrived,
>> it may cause OOM and break the kernel, just like this:
>>
>> [   79.441538] mlx4_en: eth5: Leaving promiscuous mode steering mode:2
>> [  100.067032] ksoftirqd/0: page allocation failure: order:0, mode:0x120
>> [  100.067038] CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G   OE  
>> V---   3.10.0-327.28.3.28.x86_64 #1
>> [  100.067039] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
>> rel-1.9.1-0-gb3ef39f-20161018_184732-HGH103483 04/01/2014
>> [  100.067041]  0120 b080d798 8802afd5b968 
>> 81638cb9
>> [  100.067045]  8802afd5b9f8 81171380 0010 
>> 
>> [  100.067048]  8802befd8000  0001 
>> b080d798
>> [  100.067050] Call Trace:
>> [  100.067057]  [] dump_stack+0x19/0x1b
>> [  100.067062]  [] warn_alloc_failed+0x110/0x180
>> [  100.067066]  [] __alloc_pages_nodemask+0x9b6/0xba0
>> [  100.067070]  [] ? skb_add_rx_frag+0x90/0xb0
>> [  100.067075]  [] alloc_pages_current+0xaa/0x170
>> [  100.067080]  [] mlx4_alloc_pages.isra.24+0x40/0x170 
>> [mlx4_en]
>> [  100.067083]  [] mlx4_en_alloc_frags+0xdc/0x220 [mlx4_en]
>> [  100.067086]  [] ? __netif_receive_skb+0x18/0x60
>> [  100.067088]  [] ? netif_receive_skb+0x40/0xc0
>> [  100.067092]  [] mlx4_en_process_rx_cq+0x5f1/0xec0 
>> [mlx4_en]
>> [  100.067095]  [] ? list_del+0xd/0x30
>> [  100.067098]  [] ? __napi_complete+0x1f/0x30
>> [  100.067101]  [] mlx4_en_poll_rx_cq+0x9f/0x170 [mlx4_en]
>> [  100.067103]  [] net_rx_action+0x152/0x240
>> [  100.067107]  [] __do_softirq+0xef/0x280
>> [  100.067109]  [] run_ksoftirqd+0x30/0x50
>> [  100.067114]  [] smpboot_thread_fn+0xff/0x1a0
>> [  100.067117]  [] ? schedule+0x29/0x70
>> [  100.067120]  [] ? lg_double_unlock+0x90/0x90
>> [  100.067122]  [] kthread+0xcf/0xe0
>> [  100.067124]  [] ? kthread_create_on_node+0x140/0x140
>> [  100.067127]  [] ret_from_fork+0x58/0x90
>> [  100.067129]  [] ? kthread_create_on_node+0x140/0x140
>>
>> cut here=
>>
>> The reason is that the huge abnormal IP packet will be received to net stack
>> and be dropped finally by dst_release, and the dst_release would use the 
>> rcuos
>> callback-offload kthread to free the packet, but the cond_resched_rcu_qs() 
>> will
>> calling do_softirq() to receive more and more IP abnormal packets which will 
>> be
>> throw into the RCU callbacks again later, the number of received packet is 
>> much
>> greater than the number of packets freed, it will exhaust the memory and 
>> then OOM,
>> so don't try to process any pending softirqs in the rcuos callback-offload 
>> kthread
>> is a more effective solution.
> 
> OK, but we could still have softirqs processed by the grace-period kthread
> as a result of any number of other events.  So this change might reduce
> the probability of this problem, but it doesn't eliminate it.
> 
> How huge are these huge IP packets?  Is the underlying problem that they
> are too large to use the memory-allocator fastpaths?
> 
>   Thanx, Paul
> 

I use the 40G mellanox NiC to receive packet, and the testgine could send Mac 
abnormal packet and
IP abnormal packet to full speed.

The Mac abnormal packet would be dropped at low level and not be received to 
net stack,
but the IP abnormal packet will introduce this problem, every packet will looks 
as new dst first and
release later by dst_release because it is meaningless.

dst_release->call_rcu(>rcu_head, dst_destroy_rcu);

so all packet will be freed until the rcuos callback-offload kthread 
processing, it will be a infinite loop
if huge packet is coming because the do_softirq will load more and more packet 
to the rcuos processing kthread,
so I still could not find a better way to fix this, btw, it is really hard to 
say the driver use too large memory-allocater
fastpaths, there is no memory leak and the Ixgbe may meet the same problem too.

Thanks.
Ding


>> Fix commit bedc196915 ("rcu: Fix soft lockup for rcu_nocb_kthread")
>> Signed-off-by: Ding Tianhong 
>>
>> Signed-off-by: Ding Tianhong 
>> ---
>>  kernel/rcu/tree_plugin.h | 3 +--
>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
>> index 85c5a88..760c3b5 100644
>> --- a/kernel/rcu/tree_plugin.h
>> +++ b/kernel/rcu/tree_plugin.h
>> @@ -2172,8 +2172,7 @@ static int rcu_nocb_kthread(void *arg)
>>  if (__rcu_reclaim(rdp->rsp->name, list))
>>  cl++;
>>  c++;
>> -local_bh_enable();
>> -

Re: [PATCH] rcu: fix the OOM problem of huge IP abnormal packet traffic

2016-11-18 Thread Ding Tianhong


On 2016/11/18 21:01, Paul E. McKenney wrote:
> On Fri, Nov 18, 2016 at 08:40:09PM +0800, Ding Tianhong wrote:
>> The commit bedc196915 ("rcu: Fix soft lockup for rcu_nocb_kthread")
>> will introduce a new problem that when huge IP abnormal packet arrived,
>> it may cause OOM and break the kernel, just like this:
>>
>> [   79.441538] mlx4_en: eth5: Leaving promiscuous mode steering mode:2
>> [  100.067032] ksoftirqd/0: page allocation failure: order:0, mode:0x120
>> [  100.067038] CPU: 0 PID: 3 Comm: ksoftirqd/0 Tainted: G   OE  
>> V---   3.10.0-327.28.3.28.x86_64 #1
>> [  100.067039] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
>> rel-1.9.1-0-gb3ef39f-20161018_184732-HGH103483 04/01/2014
>> [  100.067041]  0120 b080d798 8802afd5b968 
>> 81638cb9
>> [  100.067045]  8802afd5b9f8 81171380 0010 
>> 
>> [  100.067048]  8802befd8000  0001 
>> b080d798
>> [  100.067050] Call Trace:
>> [  100.067057]  [] dump_stack+0x19/0x1b
>> [  100.067062]  [] warn_alloc_failed+0x110/0x180
>> [  100.067066]  [] __alloc_pages_nodemask+0x9b6/0xba0
>> [  100.067070]  [] ? skb_add_rx_frag+0x90/0xb0
>> [  100.067075]  [] alloc_pages_current+0xaa/0x170
>> [  100.067080]  [] mlx4_alloc_pages.isra.24+0x40/0x170 
>> [mlx4_en]
>> [  100.067083]  [] mlx4_en_alloc_frags+0xdc/0x220 [mlx4_en]
>> [  100.067086]  [] ? __netif_receive_skb+0x18/0x60
>> [  100.067088]  [] ? netif_receive_skb+0x40/0xc0
>> [  100.067092]  [] mlx4_en_process_rx_cq+0x5f1/0xec0 
>> [mlx4_en]
>> [  100.067095]  [] ? list_del+0xd/0x30
>> [  100.067098]  [] ? __napi_complete+0x1f/0x30
>> [  100.067101]  [] mlx4_en_poll_rx_cq+0x9f/0x170 [mlx4_en]
>> [  100.067103]  [] net_rx_action+0x152/0x240
>> [  100.067107]  [] __do_softirq+0xef/0x280
>> [  100.067109]  [] run_ksoftirqd+0x30/0x50
>> [  100.067114]  [] smpboot_thread_fn+0xff/0x1a0
>> [  100.067117]  [] ? schedule+0x29/0x70
>> [  100.067120]  [] ? lg_double_unlock+0x90/0x90
>> [  100.067122]  [] kthread+0xcf/0xe0
>> [  100.067124]  [] ? kthread_create_on_node+0x140/0x140
>> [  100.067127]  [] ret_from_fork+0x58/0x90
>> [  100.067129]  [] ? kthread_create_on_node+0x140/0x140
>>
>> cut here=
>>
>> The reason is that the huge abnormal IP packet will be received to net stack
>> and be dropped finally by dst_release, and the dst_release would use the 
>> rcuos
>> callback-offload kthread to free the packet, but the cond_resched_rcu_qs() 
>> will
>> calling do_softirq() to receive more and more IP abnormal packets which will 
>> be
>> throw into the RCU callbacks again later, the number of received packet is 
>> much
>> greater than the number of packets freed, it will exhaust the memory and 
>> then OOM,
>> so don't try to process any pending softirqs in the rcuos callback-offload 
>> kthread
>> is a more effective solution.
> 
> OK, but we could still have softirqs processed by the grace-period kthread
> as a result of any number of other events.  So this change might reduce
> the probability of this problem, but it doesn't eliminate it.
> 
> How huge are these huge IP packets?  Is the underlying problem that they
> are too large to use the memory-allocator fastpaths?
> 
>   Thanx, Paul
> 

I use the 40G mellanox NiC to receive packet, and the testgine could send Mac 
abnormal packet and
IP abnormal packet to full speed.

The Mac abnormal packet would be dropped at low level and not be received to 
net stack,
but the IP abnormal packet will introduce this problem, every packet will looks 
as new dst first and
release later by dst_release because it is meaningless.

dst_release->call_rcu(>rcu_head, dst_destroy_rcu);

so all packet will be freed until the rcuos callback-offload kthread 
processing, it will be a infinite loop
if huge packet is coming because the do_softirq will load more and more packet 
to the rcuos processing kthread,
so I still could not find a better way to fix this, btw, it is really hard to 
say the driver use too large memory-allocater
fastpaths, there is no memory leak and the Ixgbe may meet the same problem too.

Thanks.
Ding


>> Fix commit bedc196915 ("rcu: Fix soft lockup for rcu_nocb_kthread")
>> Signed-off-by: Ding Tianhong 
>>
>> Signed-off-by: Ding Tianhong 
>> ---
>>  kernel/rcu/tree_plugin.h | 3 +--
>>  1 file changed, 1 insertion(+), 2 deletions(-)
>>
>> diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
>> index 85c5a88..760c3b5 100644
>> --- a/kernel/rcu/tree_plugin.h
>> +++ b/kernel/rcu/tree_plugin.h
>> @@ -2172,8 +2172,7 @@ static int rcu_nocb_kthread(void *arg)
>>  if (__rcu_reclaim(rdp->rsp->name, list))
>>  cl++;
>>  c++;
>> -local_bh_enable();
>> -cond_resched_rcu_qs();
>> +

Re: [GIT PULL] LED updates for 4.9-rc6

2016-11-18 Thread Jacek Anaszewski

On 11/18/2016 06:07 PM, Linus Torvalds wrote:

On Fri, Nov 18, 2016 at 6:13 AM, Jacek Anaszewski
 wrote:


Please pull LED subsystem related updates for MAINTAINERS.
I'd like to announce a new co-maintainer - Pavel Machek.


No can do.

You've done something incredibly odd, including rebasing my 4.9-rc4
commit (and four commits before that from other people).


Sorry, I didn't pay enough attention while generating this pull request.

I have the commit to merge also on top of for-next branch.
I copied that branch locally, executed git rebase -i HEAD~20, removed
all preceding commits up to v4.9-rc4 and pushed out. I forgot that git
touches all commits within the range passed to git rebase -i. Certainly
I should have used git rebase --onto, as in my usual workflow.

I've just generated new pull request in the right way.

--
Best regards,
Jacek Anaszewski


Re: [GIT PULL] LED updates for 4.9-rc6

2016-11-18 Thread Jacek Anaszewski

On 11/18/2016 06:07 PM, Linus Torvalds wrote:

On Fri, Nov 18, 2016 at 6:13 AM, Jacek Anaszewski
 wrote:


Please pull LED subsystem related updates for MAINTAINERS.
I'd like to announce a new co-maintainer - Pavel Machek.


No can do.

You've done something incredibly odd, including rebasing my 4.9-rc4
commit (and four commits before that from other people).


Sorry, I didn't pay enough attention while generating this pull request.

I have the commit to merge also on top of for-next branch.
I copied that branch locally, executed git rebase -i HEAD~20, removed
all preceding commits up to v4.9-rc4 and pushed out. I forgot that git
touches all commits within the range passed to git rebase -i. Certainly
I should have used git rebase --onto, as in my usual workflow.

I've just generated new pull request in the right way.

--
Best regards,
Jacek Anaszewski


Re: [PATCH v5 0/3] spi-nor: Add support for Intel SPI serial flash controller

2016-11-18 Thread Mika Westerberg
On Fri, Nov 18, 2016 at 07:04:26PM +, Lee Jones wrote:
> On Mon, 14 Nov 2016, Mika Westerberg wrote:
> 
> > This is fifth version of the series. You can find the previous versions
> > archived on:
> > 
> >   v4: https://lwn.net/Articles/703773/
> >   v3: https://lwn.net/Articles/697231/
> >   v2: http://lists.infradead.org/pipermail/linux-mtd/2016-June/068277.html
> >   v1: https://lkml.org/lkml/2016/6/14/269
> > 
> > This patch series adds support for the Intel SPI serial flash controller
> > found on many recent Intel CPUs including Baytrail and Braswell. This
> > driver makes it possible to access the BIOS and other platform data which
> > is stored on the SPI serial flash. It is also possible to upgrade the BIOS
> > using this driver if it has not been protected by special hardware bits.
> > 
> > The patch [1/3] includes documentation how to upgrade BIOS on MinnowBoard
> > MAX.
> > 
> > Since poking the SPI serial flash can brick the machine, this driver can
> > only be enabled when CONFIG_EXPERT=y and even then it will remain read-only
> > unless instructed othwerwise by module parameter.
> > 
> > Changes from v4:
> >   * Use INTEL_SPI_FIFO_SZ instead of hard coded value of 64 bytes
> >   * Don't increment i inside call to FDATA() macro
> >   * Check nor->read_opcode in intel_spi_read() and return
> > -EINVAL if not supported. We may add SFDP support later on.
> > 
> > Changes from v3:
> >   * Added ACKs from Lee Jones.
> >   * Use bus instead of dev->bus in PCI accesses
> > 
> > Changes from v2:
> >   * Rebased on top of v4.8-rc2
> >   * Updated intel_spi_read/write() according spi-nor core changes which
> > drops retlen parameter and returns number of bytes read/written.
> > 
> > Changes from v1:
> >   * Older hardware does not support 64k erase command so added erase_64k
> > flag which is set only for Broxton (BXT).
> >   * Fix protection range offset for Broxton. Now there is ispi->pregs
> > pointing to the start of the protection registers.
> >   * Change naming of constants from BCR_BYT -> BYT_BCR and so on.
> >   * Drop lpc_ich_finalize_spi_cell() and initialize cell directly in
> > lpc_ich_init_spi().
> >   * Use info->type in switch in lpc_ich_init_spi().
> >   * Add defines for magic numbers used in lpc_ich_init_spi().
> >   * Use PLATFORM_DEVID_NONE with mfd_add_devices().
> > 
> > Mika Westerberg (3):
> >   spi-nor: Add support for Intel SPI serial flash controller
> >   mfd: lpc_ich: Add support for SPI serial flash host controller
> >   mfd: lpc_ich: Add support for Intel Apollo Lake SoC
> 
> What's the plan for this set?

I was hoping to get this merged via MTD tree but I haven't got much
comments from the maintainers. No idea if anyone is going to take this :-(


Re: [PATCH v5 0/3] spi-nor: Add support for Intel SPI serial flash controller

2016-11-18 Thread Mika Westerberg
On Fri, Nov 18, 2016 at 07:04:26PM +, Lee Jones wrote:
> On Mon, 14 Nov 2016, Mika Westerberg wrote:
> 
> > This is fifth version of the series. You can find the previous versions
> > archived on:
> > 
> >   v4: https://lwn.net/Articles/703773/
> >   v3: https://lwn.net/Articles/697231/
> >   v2: http://lists.infradead.org/pipermail/linux-mtd/2016-June/068277.html
> >   v1: https://lkml.org/lkml/2016/6/14/269
> > 
> > This patch series adds support for the Intel SPI serial flash controller
> > found on many recent Intel CPUs including Baytrail and Braswell. This
> > driver makes it possible to access the BIOS and other platform data which
> > is stored on the SPI serial flash. It is also possible to upgrade the BIOS
> > using this driver if it has not been protected by special hardware bits.
> > 
> > The patch [1/3] includes documentation how to upgrade BIOS on MinnowBoard
> > MAX.
> > 
> > Since poking the SPI serial flash can brick the machine, this driver can
> > only be enabled when CONFIG_EXPERT=y and even then it will remain read-only
> > unless instructed othwerwise by module parameter.
> > 
> > Changes from v4:
> >   * Use INTEL_SPI_FIFO_SZ instead of hard coded value of 64 bytes
> >   * Don't increment i inside call to FDATA() macro
> >   * Check nor->read_opcode in intel_spi_read() and return
> > -EINVAL if not supported. We may add SFDP support later on.
> > 
> > Changes from v3:
> >   * Added ACKs from Lee Jones.
> >   * Use bus instead of dev->bus in PCI accesses
> > 
> > Changes from v2:
> >   * Rebased on top of v4.8-rc2
> >   * Updated intel_spi_read/write() according spi-nor core changes which
> > drops retlen parameter and returns number of bytes read/written.
> > 
> > Changes from v1:
> >   * Older hardware does not support 64k erase command so added erase_64k
> > flag which is set only for Broxton (BXT).
> >   * Fix protection range offset for Broxton. Now there is ispi->pregs
> > pointing to the start of the protection registers.
> >   * Change naming of constants from BCR_BYT -> BYT_BCR and so on.
> >   * Drop lpc_ich_finalize_spi_cell() and initialize cell directly in
> > lpc_ich_init_spi().
> >   * Use info->type in switch in lpc_ich_init_spi().
> >   * Add defines for magic numbers used in lpc_ich_init_spi().
> >   * Use PLATFORM_DEVID_NONE with mfd_add_devices().
> > 
> > Mika Westerberg (3):
> >   spi-nor: Add support for Intel SPI serial flash controller
> >   mfd: lpc_ich: Add support for SPI serial flash host controller
> >   mfd: lpc_ich: Add support for Intel Apollo Lake SoC
> 
> What's the plan for this set?

I was hoping to get this merged via MTD tree but I haven't got much
comments from the maintainers. No idea if anyone is going to take this :-(


[GIT PULL v2] LED updates for 4.9-rc6

2016-11-18 Thread Jacek Anaszewski
Hi Linus,

Please pull LED subsystem related updates for MAINTAINERS.
I'd like to announce a new co-maintainer - Pavel Machek.

The following changes since commit bc33b0ca11e3df46a4fa7639ba488c9d4911:

  Linux 4.9-rc4 (2016-11-05 16:23:36 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git 
tags/leds_4.9-rc6

for you to fetch changes up to dbfa048db97c15ee3fff2ee17b19e61f3ab12d53:

  MAINTAINERS: Add LED subsystem co-maintainer (2016-11-18 23:56:10 +0100)

Thanks,
Jacek Anaszewski


- add LED subsystem co-maintainer


Pavel Machek (1):
  MAINTAINERS: Add LED subsystem co-maintainer

 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)


[GIT PULL v2] LED updates for 4.9-rc6

2016-11-18 Thread Jacek Anaszewski
Hi Linus,

Please pull LED subsystem related updates for MAINTAINERS.
I'd like to announce a new co-maintainer - Pavel Machek.

The following changes since commit bc33b0ca11e3df46a4fa7639ba488c9d4911:

  Linux 4.9-rc4 (2016-11-05 16:23:36 -0700)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/j.anaszewski/linux-leds.git 
tags/leds_4.9-rc6

for you to fetch changes up to dbfa048db97c15ee3fff2ee17b19e61f3ab12d53:

  MAINTAINERS: Add LED subsystem co-maintainer (2016-11-18 23:56:10 +0100)

Thanks,
Jacek Anaszewski


- add LED subsystem co-maintainer


Pavel Machek (1):
  MAINTAINERS: Add LED subsystem co-maintainer

 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)


Re: [PATCH] rcu: Fix soft lockup for rcu_nocb_kthread

2016-11-18 Thread Ding Tianhong


On 2016/11/18 20:56, Paul E. McKenney wrote:
> On Fri, Nov 18, 2016 at 08:37:28PM +0800, Ding Tianhong wrote:
>>
>>
>> On 2016/8/10 9:59, Paul E. McKenney wrote:
>>> On Wed, Aug 10, 2016 at 09:13:14AM +0800, Ding Tianhong wrote:
 On 2016/6/16 22:19, Paul E. McKenney wrote:
> On Thu, Jun 16, 2016 at 02:09:47PM +0800, Ding Tianhong wrote:
>> On 2016/6/15 23:49, Paul E. McKenney wrote:
>>> On Wed, Jun 15, 2016 at 03:27:36PM +0800, Ding Tianhong wrote:
 I met this problem when using the Testgine to send package to ixgbevf 
 nic
 by this steps:
 1. Connect to ixgbevf, and set the speed to 10Gb/s, it could work fine.
 2. Then use ifconfig to down the nic and up again, loop for several 
 times.
 3. The system panic by soft lockup.
>>>
>>> Good catch, queued for review and testing.  But what .config was your
>>> kernel built with?
>>>
>>
>> I use the redhat7.1 defconfig to build my kernel, and the RCU config is 
>> this:
>>  120 #
>>  121 # RCU Subsystem
>>  122 #
>>  123 CONFIG_TREE_RCU=y
>>  124 # CONFIG_PREEMPT_RCU is not set
>>  125 CONFIG_RCU_STALL_COMMON=y
>>  126 CONFIG_CONTEXT_TRACKING=y
>>  127 CONFIG_RCU_USER_QS=y
>>  128 # CONFIG_CONTEXT_TRACKING_FORCE is not set
>>  129 CONFIG_RCU_FANOUT=64
>>  130 CONFIG_RCU_FANOUT_LEAF=16
>>  131 # CONFIG_RCU_FANOUT_EXACT is not set
>>  132 # CONFIG_RCU_FAST_NO_HZ is not set
>>  133 # CONFIG_TREE_RCU_TRACE is not set
>>  134 CONFIG_RCU_NOCB_CPU=y
>>  135 CONFIG_RCU_NOCB_CPU_ALL=y
>>  136 CONFIG_BUILD_BIN2C=y
>
> Thank you!  You were running with preemption disabled, so your system
> would indeed be very susceptible to this problem.
>
>>> Also, I did tweak both the commit log and the patch.  Your 
>>> cond_resched()
>>> would prevent soft lockups, but not RCU stalls, so I substituted
>>> cond_resched_rcu_qs().  Please let me know if either of those changes
>>> causes problems at your end.
>>
>> Looks fine to me, I will apply this to my branch and test it, thanks.
>
> Please let me know how it goes!
>
>   Thanx, Paul
>

 Hi Paul:

 It has been a long time after applying this patch, and didn't found any 
 problem, I believe this patch is fine, thanks.
>>>
>>> Very good!  I will push this one upstream during the next merge window.
>>>
>>> Thanx, Paul
>>>
>>
>> Hi Paul:
>>
>> Sorry to say that I have found this patch will introduce an OOM problem, it 
>> will be triggered by huge IP abnormal packet
>> arrived, it looks that avoid process any pending softirqs in the rcuos 
>> kthread is the best way to fix this problem, I will
>> send a new patch to revert this and fix the problem.
> 
> Interesting...
> 
> Could you please let me know exactly how the added cond_resched_rcu_qs()
> leads to an OOM?  Is it that the softirqs prevent the grace-period kthread
> from making progress?
> 

Ok, reply and discuss on other patch, thanks.

Ding

>   Thanx, Paul
> 
> 
> .
> 



Re: [PATCH] rcu: Fix soft lockup for rcu_nocb_kthread

2016-11-18 Thread Ding Tianhong


On 2016/11/18 20:56, Paul E. McKenney wrote:
> On Fri, Nov 18, 2016 at 08:37:28PM +0800, Ding Tianhong wrote:
>>
>>
>> On 2016/8/10 9:59, Paul E. McKenney wrote:
>>> On Wed, Aug 10, 2016 at 09:13:14AM +0800, Ding Tianhong wrote:
 On 2016/6/16 22:19, Paul E. McKenney wrote:
> On Thu, Jun 16, 2016 at 02:09:47PM +0800, Ding Tianhong wrote:
>> On 2016/6/15 23:49, Paul E. McKenney wrote:
>>> On Wed, Jun 15, 2016 at 03:27:36PM +0800, Ding Tianhong wrote:
 I met this problem when using the Testgine to send package to ixgbevf 
 nic
 by this steps:
 1. Connect to ixgbevf, and set the speed to 10Gb/s, it could work fine.
 2. Then use ifconfig to down the nic and up again, loop for several 
 times.
 3. The system panic by soft lockup.
>>>
>>> Good catch, queued for review and testing.  But what .config was your
>>> kernel built with?
>>>
>>
>> I use the redhat7.1 defconfig to build my kernel, and the RCU config is 
>> this:
>>  120 #
>>  121 # RCU Subsystem
>>  122 #
>>  123 CONFIG_TREE_RCU=y
>>  124 # CONFIG_PREEMPT_RCU is not set
>>  125 CONFIG_RCU_STALL_COMMON=y
>>  126 CONFIG_CONTEXT_TRACKING=y
>>  127 CONFIG_RCU_USER_QS=y
>>  128 # CONFIG_CONTEXT_TRACKING_FORCE is not set
>>  129 CONFIG_RCU_FANOUT=64
>>  130 CONFIG_RCU_FANOUT_LEAF=16
>>  131 # CONFIG_RCU_FANOUT_EXACT is not set
>>  132 # CONFIG_RCU_FAST_NO_HZ is not set
>>  133 # CONFIG_TREE_RCU_TRACE is not set
>>  134 CONFIG_RCU_NOCB_CPU=y
>>  135 CONFIG_RCU_NOCB_CPU_ALL=y
>>  136 CONFIG_BUILD_BIN2C=y
>
> Thank you!  You were running with preemption disabled, so your system
> would indeed be very susceptible to this problem.
>
>>> Also, I did tweak both the commit log and the patch.  Your 
>>> cond_resched()
>>> would prevent soft lockups, but not RCU stalls, so I substituted
>>> cond_resched_rcu_qs().  Please let me know if either of those changes
>>> causes problems at your end.
>>
>> Looks fine to me, I will apply this to my branch and test it, thanks.
>
> Please let me know how it goes!
>
>   Thanx, Paul
>

 Hi Paul:

 It has been a long time after applying this patch, and didn't found any 
 problem, I believe this patch is fine, thanks.
>>>
>>> Very good!  I will push this one upstream during the next merge window.
>>>
>>> Thanx, Paul
>>>
>>
>> Hi Paul:
>>
>> Sorry to say that I have found this patch will introduce an OOM problem, it 
>> will be triggered by huge IP abnormal packet
>> arrived, it looks that avoid process any pending softirqs in the rcuos 
>> kthread is the best way to fix this problem, I will
>> send a new patch to revert this and fix the problem.
> 
> Interesting...
> 
> Could you please let me know exactly how the added cond_resched_rcu_qs()
> leads to an OOM?  Is it that the softirqs prevent the grace-period kthread
> from making progress?
> 

Ok, reply and discuss on other patch, thanks.

Ding

>   Thanx, Paul
> 
> 
> .
> 



Re: [REVIEW][PATCH 0/3] Fixing ptrace vs exec vs userns interactions

2016-11-18 Thread Willy Tarreau
Hi Eric,

On Thu, Nov 17, 2016 at 11:02:47AM -0600, Eric W. Biederman wrote:
> 
> With everyone heading to Kernel Summit and Plumbers I put this set of
> patches down temporarily.   Now is the time to take it back up and to
> make certain I am not missing something stupid in this set of patches.

I couldn't get your patch set to apply to any of the kernels I tried,
I manually adjusted some parts but the second one has too many rejects.
What kernel should I apply this to ? Or maybe some preliminary patches
are needed ?

Thanks,
Willy


Re: [REVIEW][PATCH 0/3] Fixing ptrace vs exec vs userns interactions

2016-11-18 Thread Willy Tarreau
Hi Eric,

On Thu, Nov 17, 2016 at 11:02:47AM -0600, Eric W. Biederman wrote:
> 
> With everyone heading to Kernel Summit and Plumbers I put this set of
> patches down temporarily.   Now is the time to take it back up and to
> make certain I am not missing something stupid in this set of patches.

I couldn't get your patch set to apply to any of the kernels I tried,
I manually adjusted some parts but the second one has too many rejects.
What kernel should I apply this to ? Or maybe some preliminary patches
are needed ?

Thanks,
Willy


RE: [RFC][PATCH 7/7] kref: Implement using refcount_t

2016-11-18 Thread Reshetova, Elena

> On Fri, Nov 18, 2016 at 04:58:52PM +, Reshetova, Elena wrote:
> > > Could you please fix you mailer to not unwrap the emails?
> >
> > I wish I understand what you mean by "unwrap"... ?
> 
> Where I always have lines wrapped at 78 characters, but often when I see
> them back in your reply, they're unwrapped and go on forever.
> 
> For some reason your mailer reflows text and mucks with whitespace. I
> know Outlook likes to do this by default.

Ok, I think I managed to fix it. Hope it looks better now. 
 
> > On Fri, Nov 18, 2016 at 10:47:40AM +, Reshetova, Elena wrote:
> 
> > > Oh, and if we define refcount_t to be just atomic_t underneath, what
> > > about the other atomic_long_t, local_t and atomic64_t cases when it is
> > > used for recounting?  I don't feel good just simply changing them to
> > > become atomic_t under refcount_t wrapper.
> >
> > > Is there anybody using local_t ? That seems 'creative' and highly
> questionable.
> > I am not yet sure about refcounts, but local_t itself is used in couple of 
> > places.
> 
> Sure, there's local_t usage, but I'd be very surprised if there's a
> single refcount usage among them.
> 
> > >As for atomic_long_t there's very few, I'd leave them be for now,
> 
> > Ok, I have started a list on them to keep track, but we need to do
> > them also. There is no reason for them not to be refcounts, since so
> > far the ones I see are classical refcounts.
> 
> Well, if you get to tools (cocci script or whatever) to reliably work
> fork atomic_t, then converting the few atomic_long_t's later should be
> trivial.

I am using coccinelle to find all occurrences, but I do the changes only in 
semi-automated fashion.
Each change needs a proper manual review anyway and often one variable usage is 
spread between different headers/source files,
so I prefer not to go to full automation and then not being sure what I have 
done. 


RE: [RFC][PATCH 7/7] kref: Implement using refcount_t

2016-11-18 Thread Reshetova, Elena

> On Fri, Nov 18, 2016 at 04:58:52PM +, Reshetova, Elena wrote:
> > > Could you please fix you mailer to not unwrap the emails?
> >
> > I wish I understand what you mean by "unwrap"... ?
> 
> Where I always have lines wrapped at 78 characters, but often when I see
> them back in your reply, they're unwrapped and go on forever.
> 
> For some reason your mailer reflows text and mucks with whitespace. I
> know Outlook likes to do this by default.

Ok, I think I managed to fix it. Hope it looks better now. 
 
> > On Fri, Nov 18, 2016 at 10:47:40AM +, Reshetova, Elena wrote:
> 
> > > Oh, and if we define refcount_t to be just atomic_t underneath, what
> > > about the other atomic_long_t, local_t and atomic64_t cases when it is
> > > used for recounting?  I don't feel good just simply changing them to
> > > become atomic_t under refcount_t wrapper.
> >
> > > Is there anybody using local_t ? That seems 'creative' and highly
> questionable.
> > I am not yet sure about refcounts, but local_t itself is used in couple of 
> > places.
> 
> Sure, there's local_t usage, but I'd be very surprised if there's a
> single refcount usage among them.
> 
> > >As for atomic_long_t there's very few, I'd leave them be for now,
> 
> > Ok, I have started a list on them to keep track, but we need to do
> > them also. There is no reason for them not to be refcounts, since so
> > far the ones I see are classical refcounts.
> 
> Well, if you get to tools (cocci script or whatever) to reliably work
> fork atomic_t, then converting the few atomic_long_t's later should be
> trivial.

I am using coccinelle to find all occurrences, but I do the changes only in 
semi-automated fashion.
Each change needs a proper manual review anyway and often one variable usage is 
spread between different headers/source files,
so I prefer not to go to full automation and then not being sure what I have 
done. 


[PATCH v6] USB hub_probe: rework ugly goto-into-compound-statement

2016-11-18 Thread Eugene Korenevsky
Rework smelling code (goto inside compound statement). Perhaps this is
legacy. Anyway such code is not appropriate for Linux kernel.

Signed-off-by: Eugene Korenevsky 
---
Changes in v6: more pedantic conversion from `int` to `bool`; fix comment
Changes in v5: make `bool` a return type of `hub_check_descriptor_sanity()`
Changes in v4: fix typo
Changes in v3: extract the code to static function
Changes in v2: fix spaces instead of tab, add missing 'Signed-off-by'

 drivers/usb/core/hub.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cbb1467..dbebfe4 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1722,10 +1722,28 @@ static void hub_disconnect(struct usb_interface *intf)
kref_put(>kref, hub_release);
 }
 
+static bool hub_check_descriptor_sanity(struct usb_host_interface *desc)
+{
+   /* Some hubs have a subclass of 1, which AFAICT according to the */
+   /*  specs is not defined, but it works */
+   if (desc->desc.bInterfaceSubClass != 0 &&
+   desc->desc.bInterfaceSubClass != 1)
+   return false;
+
+   /* Multiple endpoints? What kind of mutant ninja-hub is this? */
+   if (desc->desc.bNumEndpoints != 1)
+   return false;
+
+   /* If the first endpoint is not interrupt IN, we'd better punt! */
+   if (!usb_endpoint_is_int_in(>endpoint[0].desc))
+   return false;
+
+return true;
+}
+
 static int hub_probe(struct usb_interface *intf, const struct usb_device_id 
*id)
 {
struct usb_host_interface *desc;
-   struct usb_endpoint_descriptor *endpoint;
struct usb_device *hdev;
struct usb_hub *hub;
 
@@ -1800,25 +1818,11 @@ static int hub_probe(struct usb_interface *intf, const 
struct usb_device_id *id)
}
 #endif
 
-   /* Some hubs have a subclass of 1, which AFAICT according to the */
-   /*  specs is not defined, but it works */
-   if ((desc->desc.bInterfaceSubClass != 0) &&
-   (desc->desc.bInterfaceSubClass != 1)) {
-descriptor_error:
+   if (!hub_check_descriptor_sanity(desc)) {
dev_err(>dev, "bad descriptor, ignoring hub\n");
return -EIO;
}
 
-   /* Multiple endpoints? What kind of mutant ninja-hub is this? */
-   if (desc->desc.bNumEndpoints != 1)
-   goto descriptor_error;
-
-   endpoint = >endpoint[0].desc;
-
-   /* If it's not an interrupt in endpoint, we'd better punt! */
-   if (!usb_endpoint_is_int_in(endpoint))
-   goto descriptor_error;
-
/* We found a hub */
dev_info(>dev, "USB hub found\n");
 
@@ -1845,7 +1849,7 @@ static int hub_probe(struct usb_interface *intf, const 
struct usb_device_id *id)
if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
hub->quirk_check_port_auto_suspend = 1;
 
-   if (hub_configure(hub, endpoint) >= 0)
+   if (hub_configure(hub, >endpoint[0].desc) >= 0)
return 0;
 
hub_disconnect(intf);
-- 
2.10.2




[PATCH v6] USB hub_probe: rework ugly goto-into-compound-statement

2016-11-18 Thread Eugene Korenevsky
Rework smelling code (goto inside compound statement). Perhaps this is
legacy. Anyway such code is not appropriate for Linux kernel.

Signed-off-by: Eugene Korenevsky 
---
Changes in v6: more pedantic conversion from `int` to `bool`; fix comment
Changes in v5: make `bool` a return type of `hub_check_descriptor_sanity()`
Changes in v4: fix typo
Changes in v3: extract the code to static function
Changes in v2: fix spaces instead of tab, add missing 'Signed-off-by'

 drivers/usb/core/hub.c | 38 +-
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index cbb1467..dbebfe4 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1722,10 +1722,28 @@ static void hub_disconnect(struct usb_interface *intf)
kref_put(>kref, hub_release);
 }
 
+static bool hub_check_descriptor_sanity(struct usb_host_interface *desc)
+{
+   /* Some hubs have a subclass of 1, which AFAICT according to the */
+   /*  specs is not defined, but it works */
+   if (desc->desc.bInterfaceSubClass != 0 &&
+   desc->desc.bInterfaceSubClass != 1)
+   return false;
+
+   /* Multiple endpoints? What kind of mutant ninja-hub is this? */
+   if (desc->desc.bNumEndpoints != 1)
+   return false;
+
+   /* If the first endpoint is not interrupt IN, we'd better punt! */
+   if (!usb_endpoint_is_int_in(>endpoint[0].desc))
+   return false;
+
+return true;
+}
+
 static int hub_probe(struct usb_interface *intf, const struct usb_device_id 
*id)
 {
struct usb_host_interface *desc;
-   struct usb_endpoint_descriptor *endpoint;
struct usb_device *hdev;
struct usb_hub *hub;
 
@@ -1800,25 +1818,11 @@ static int hub_probe(struct usb_interface *intf, const 
struct usb_device_id *id)
}
 #endif
 
-   /* Some hubs have a subclass of 1, which AFAICT according to the */
-   /*  specs is not defined, but it works */
-   if ((desc->desc.bInterfaceSubClass != 0) &&
-   (desc->desc.bInterfaceSubClass != 1)) {
-descriptor_error:
+   if (!hub_check_descriptor_sanity(desc)) {
dev_err(>dev, "bad descriptor, ignoring hub\n");
return -EIO;
}
 
-   /* Multiple endpoints? What kind of mutant ninja-hub is this? */
-   if (desc->desc.bNumEndpoints != 1)
-   goto descriptor_error;
-
-   endpoint = >endpoint[0].desc;
-
-   /* If it's not an interrupt in endpoint, we'd better punt! */
-   if (!usb_endpoint_is_int_in(endpoint))
-   goto descriptor_error;
-
/* We found a hub */
dev_info(>dev, "USB hub found\n");
 
@@ -1845,7 +1849,7 @@ static int hub_probe(struct usb_interface *intf, const 
struct usb_device_id *id)
if (id->driver_info & HUB_QUIRK_CHECK_PORT_AUTOSUSPEND)
hub->quirk_check_port_auto_suspend = 1;
 
-   if (hub_configure(hub, endpoint) >= 0)
+   if (hub_configure(hub, >endpoint[0].desc) >= 0)
return 0;
 
hub_disconnect(intf);
-- 
2.10.2




[PATCH] ARM: dts: msm8916: Add and enable wcnss node

2016-11-18 Thread Bjorn Andersson
Add the wcnss remoteproc node the SMD edge and the wcnss ctrl, bluetooth
and wifi nodes specified and enable this on db410c.

Signed-off-by: Bjorn Andersson 
---

This still require the last wcn36xx and scm-interrupted patches to land, but as
those won't affect the dts I'm posting this anyway.

 arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi  |  4 ++
 arch/arm64/boot/dts/qcom/msm8916-pins.dtsi | 13 ++
 arch/arm64/boot/dts/qcom/msm8916.dtsi  | 73 +-
 3 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi 
b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
index 08bd5ebafb4e..716d3ccbc309 100644
--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
@@ -306,6 +306,10 @@
 };
 };
 };
+
+   wcnss@a21b000 {
+   status = "okay";
+   };
};
 
usb2513 {
diff --git a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
index 10c83e11c272..4cb0b5834143 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
@@ -720,4 +720,17 @@
};
};
};
+
+   wcnss_pin_a: wcnss-active {
+   pinmux {
+   pins = "gpio40", "gpio41", "gpio42", "gpio43", "gpio44";
+   function = "wcss_wlan";
+   };
+
+   pinconf {
+   pins = "gpio40", "gpio41", "gpio42", "gpio43", "gpio44";
+   drive-strength = <6>;
+   bias-pull-up;
+   };
+   };
 };
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 4221b7d2c0ce..2c692650ae43 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 / {
model = "Qualcomm Technologies, Inc. MSM8916";
@@ -82,7 +83,7 @@
no-map;
};
 
-   wcnss@8930 {
+   wcnss_mem: wcnss@8930 {
reg = <0x0 0x8930 0x0 0x60>;
no-map;
};
@@ -853,6 +854,76 @@
memory-region = <_mem>;
};
};
+
+   pronto: wcnss@a21b000 {
+   compatible = "qcom,pronto-v2-pil", "qcom,pronto";
+   reg = <0x0a204000 0x2000>, <0x0a202000 0x1000>, 
<0x0a21b000 0x3000>;
+   reg-names = "ccu", "dxe", "pmu";
+
+   memory-region = <_mem>;
+
+   interrupts-extended = < 0 149 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 0 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 1 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 2 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 3 
IRQ_TYPE_EDGE_RISING>;
+   interrupt-names = "wdog", "fatal", "ready", "handover", 
"stop-ack";
+
+   vddmx-supply = <_l3>;
+   vddpx-supply = <_l7>;
+
+   qcom,state = <_smp2p_out 0>;
+   qcom,state-names = "stop";
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_pin_a>;
+
+   status = "disabled";
+
+   iris {
+   compatible = "qcom,wcn3620";
+
+   clocks = < RPM_SMD_RF_CLK2>;
+   clock-names = "xo";
+
+   vddxo-supply = <_l7>;
+   vddrfa-supply = <_s3>;
+   vddpa-supply = <_l9>;
+   vdddig-supply = <_l5>;
+   };
+
+   smd-edge {
+   interrupts = <0 142 1>;
+
+   qcom,ipc = < 8 17>;
+   qcom,smd-edge = <6>;
+   qcom,remote-pid = <4>;
+
+   label = "pronto";
+
+   wcnss {
+   compatible = "qcom,wcnss";
+   qcom,smd-channels = "WCNSS_CTRL";
+
+   qcom,mmio = <>;
+
+   bt {
+   compatible = "qcom,wcnss-bt";
+   };
+
+   wifi {
+   compatible = "qcom,wcnss-wlan";
+
+   

[PATCH] ARM: dts: msm8916: Add and enable wcnss node

2016-11-18 Thread Bjorn Andersson
Add the wcnss remoteproc node the SMD edge and the wcnss ctrl, bluetooth
and wifi nodes specified and enable this on db410c.

Signed-off-by: Bjorn Andersson 
---

This still require the last wcn36xx and scm-interrupted patches to land, but as
those won't affect the dts I'm posting this anyway.

 arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi  |  4 ++
 arch/arm64/boot/dts/qcom/msm8916-pins.dtsi | 13 ++
 arch/arm64/boot/dts/qcom/msm8916.dtsi  | 73 +-
 3 files changed, 89 insertions(+), 1 deletion(-)

diff --git a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi 
b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
index 08bd5ebafb4e..716d3ccbc309 100644
--- a/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
+++ b/arch/arm64/boot/dts/qcom/apq8016-sbc.dtsi
@@ -306,6 +306,10 @@
 };
 };
 };
+
+   wcnss@a21b000 {
+   status = "okay";
+   };
};
 
usb2513 {
diff --git a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
index 10c83e11c272..4cb0b5834143 100644
--- a/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916-pins.dtsi
@@ -720,4 +720,17 @@
};
};
};
+
+   wcnss_pin_a: wcnss-active {
+   pinmux {
+   pins = "gpio40", "gpio41", "gpio42", "gpio43", "gpio44";
+   function = "wcss_wlan";
+   };
+
+   pinconf {
+   pins = "gpio40", "gpio41", "gpio42", "gpio43", "gpio44";
+   drive-strength = <6>;
+   bias-pull-up;
+   };
+   };
 };
diff --git a/arch/arm64/boot/dts/qcom/msm8916.dtsi 
b/arch/arm64/boot/dts/qcom/msm8916.dtsi
index 4221b7d2c0ce..2c692650ae43 100644
--- a/arch/arm64/boot/dts/qcom/msm8916.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8916.dtsi
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 / {
model = "Qualcomm Technologies, Inc. MSM8916";
@@ -82,7 +83,7 @@
no-map;
};
 
-   wcnss@8930 {
+   wcnss_mem: wcnss@8930 {
reg = <0x0 0x8930 0x0 0x60>;
no-map;
};
@@ -853,6 +854,76 @@
memory-region = <_mem>;
};
};
+
+   pronto: wcnss@a21b000 {
+   compatible = "qcom,pronto-v2-pil", "qcom,pronto";
+   reg = <0x0a204000 0x2000>, <0x0a202000 0x1000>, 
<0x0a21b000 0x3000>;
+   reg-names = "ccu", "dxe", "pmu";
+
+   memory-region = <_mem>;
+
+   interrupts-extended = < 0 149 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 0 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 1 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 2 
IRQ_TYPE_EDGE_RISING>,
+ <_smp2p_in 3 
IRQ_TYPE_EDGE_RISING>;
+   interrupt-names = "wdog", "fatal", "ready", "handover", 
"stop-ack";
+
+   vddmx-supply = <_l3>;
+   vddpx-supply = <_l7>;
+
+   qcom,state = <_smp2p_out 0>;
+   qcom,state-names = "stop";
+
+   pinctrl-names = "default";
+   pinctrl-0 = <_pin_a>;
+
+   status = "disabled";
+
+   iris {
+   compatible = "qcom,wcn3620";
+
+   clocks = < RPM_SMD_RF_CLK2>;
+   clock-names = "xo";
+
+   vddxo-supply = <_l7>;
+   vddrfa-supply = <_s3>;
+   vddpa-supply = <_l9>;
+   vdddig-supply = <_l5>;
+   };
+
+   smd-edge {
+   interrupts = <0 142 1>;
+
+   qcom,ipc = < 8 17>;
+   qcom,smd-edge = <6>;
+   qcom,remote-pid = <4>;
+
+   label = "pronto";
+
+   wcnss {
+   compatible = "qcom,wcnss";
+   qcom,smd-channels = "WCNSS_CTRL";
+
+   qcom,mmio = <>;
+
+   bt {
+   compatible = "qcom,wcnss-bt";
+   };
+
+   wifi {
+   compatible = "qcom,wcnss-wlan";
+
+   

Re: [PATCH v5] USB hub_probe: rework ugly goto-into-compound-statement

2016-11-18 Thread Eugene Korenevsky
> Ok, I'm going to be really pedantic here and ask that you spell this
> last statement out:
>if (usb...)
> return true;
>return false;


> 
> Also, the comment should say:
>   /* If the first endpoint is not interrupt IN, we... */
> 

It's better to inverse the condition and return false:

```
if (!usb...)
return false;
return true;
```

This is exactly what is said in the comment ("If the first endpoint...
we'd better punt!"). And does not break the composition of the entire
function (all `if` bodies return false, last statement is `return
true`).

-- 
Eugene



Re: [PATCH v5] USB hub_probe: rework ugly goto-into-compound-statement

2016-11-18 Thread Eugene Korenevsky
> Ok, I'm going to be really pedantic here and ask that you spell this
> last statement out:
>if (usb...)
> return true;
>return false;


> 
> Also, the comment should say:
>   /* If the first endpoint is not interrupt IN, we... */
> 

It's better to inverse the condition and return false:

```
if (!usb...)
return false;
return true;
```

This is exactly what is said in the comment ("If the first endpoint...
we'd better punt!"). And does not break the composition of the entire
function (all `if` bodies return false, last statement is `return
true`).

-- 
Eugene



[PATCH v2 0/2] dmaengine: core/omap-dma: Support for port window

2016-11-18 Thread Peter Ujfalusi
Hi,

Changes since v1:
- Make sure that the one frame covers the port_window (burst = port_window)
- added comment to explain the double indexed setup to cover the port_window
- Simplifications for the code mentioned by Russell and Vinod


Cover letter from v1:

as I'm trying to convert the remaining OMAP driver to use DMAengine instead of
the legacy omap-dma API I have encountered with the
drivers/usb/musb/tusb6010_omap.c driver.

The TUSB6010 is connected via NOR FLASH interface and it's register space is
mapped in the GPMC memory area. In OMAP SoCs we have support for external DMA
request lines and the TUSB6010 is using those as well.

With asynchronous access the DMA needs to read/write within the FIFO 'window' in
incremental address mode to read/write data.
The constant addressing only works in synchronous mode.

Since the DMA is driven by external DMA requests, the asynchronous mode is also
slave DMA operation, but currently the port window can not be 'swiped' as the
DMAengine only supports single register/address on the slave side.

This series will add support in dma_slave_config to specify the port side window
size and the second patch implements the setup needs in omap-dma driver for such
a transfer.

Regards,
Peter
---
Peter Ujfalusi (2):
  dmaengine: dma_slave_config: add support for slave port window
  dmaengine: omap-dma: Support for slave devices with data port window

 drivers/dma/omap-dma.c| 61 +--
 include/linux/dmaengine.h |  8 +++
 2 files changed, 67 insertions(+), 2 deletions(-)

-- 
2.10.2



[PATCH v2 0/2] dmaengine: core/omap-dma: Support for port window

2016-11-18 Thread Peter Ujfalusi
Hi,

Changes since v1:
- Make sure that the one frame covers the port_window (burst = port_window)
- added comment to explain the double indexed setup to cover the port_window
- Simplifications for the code mentioned by Russell and Vinod


Cover letter from v1:

as I'm trying to convert the remaining OMAP driver to use DMAengine instead of
the legacy omap-dma API I have encountered with the
drivers/usb/musb/tusb6010_omap.c driver.

The TUSB6010 is connected via NOR FLASH interface and it's register space is
mapped in the GPMC memory area. In OMAP SoCs we have support for external DMA
request lines and the TUSB6010 is using those as well.

With asynchronous access the DMA needs to read/write within the FIFO 'window' in
incremental address mode to read/write data.
The constant addressing only works in synchronous mode.

Since the DMA is driven by external DMA requests, the asynchronous mode is also
slave DMA operation, but currently the port window can not be 'swiped' as the
DMAengine only supports single register/address on the slave side.

This series will add support in dma_slave_config to specify the port side window
size and the second patch implements the setup needs in omap-dma driver for such
a transfer.

Regards,
Peter
---
Peter Ujfalusi (2):
  dmaengine: dma_slave_config: add support for slave port window
  dmaengine: omap-dma: Support for slave devices with data port window

 drivers/dma/omap-dma.c| 61 +--
 include/linux/dmaengine.h |  8 +++
 2 files changed, 67 insertions(+), 2 deletions(-)

-- 
2.10.2



Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Brian Norris
Hi,

On Fri, Nov 18, 2016 at 07:41:59PM -0800, Eduardo Valentin wrote:
> On Fri, Nov 18, 2016 at 03:52:55PM -0800, Brian Norris wrote:
> > If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
> > get an error from the zone's get_temp() callback, but we'll ignore that
> > and keep using its value. Let's just error out properly instead.
> > 
> > Signed-off-by: Brian Norris 
> > ---
> >  drivers/thermal/thermal_core.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> > index 911fd964c742..0fa497f10d25 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device 
> > *tz, int *temp)
> > mutex_lock(>lock);
> >  
> > ret = tz->ops->get_temp(tz, temp);
> > +   if (ret)
> > +   goto exit_unlock;
> 
> Yeah, but the follow through is intentional, if I am not mistaken.

OK...but it has a bug. It potentially utilizes an uninitialized value
for *temp.

> >  
> > if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
> 
> Even if the driver is not able to read real temperature, but emul temp
> is configured, then there is still opportunity to report the emulated
> temperature.

OK, maybe, but you should avoid doing this comparison then:

513 if (!ret && *temp < crit_temp)
514 *temp = tz->emul_temperature;

Note that 'ret' might be 0 (from the calls to ->get_trip_type()), and then
you're comparing with the uninitialized value of *temp. So you need some
solution that accounts for this and decides to ignore the real
temperature properly.

> > for (count = 0; count < tz->trips; count++) {
> > @@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device 
> > *tz, int *temp)
> > *temp = tz->emul_temperature;
> 
> And if you check the lines at the bottom of the loop, you will see that,
> in the fail case, we will stil compare to what is the content of temp,
> which might be problematic.

Yes...are you saying the same thing I am above?

> I would prefer we consider the patch I sent
> some time ago:
> https://patchwork.kernel.org/patch/7876381/

Honestly I didn't look that deeply into the framework here (and I also
don't use CONFIG_THERMAL_EMULATION), I was just fixing something that
was obviously wrong.

But on first read, that patch looks good to me -- although it'd be good
to note the uninitialized value fix in the comit log. Any reason that
didn't end up getting merged? It looks like it got reviewed, and you're
a thermal subsystem maintainer...

Brian


Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Brian Norris
Hi,

On Fri, Nov 18, 2016 at 07:41:59PM -0800, Eduardo Valentin wrote:
> On Fri, Nov 18, 2016 at 03:52:55PM -0800, Brian Norris wrote:
> > If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
> > get an error from the zone's get_temp() callback, but we'll ignore that
> > and keep using its value. Let's just error out properly instead.
> > 
> > Signed-off-by: Brian Norris 
> > ---
> >  drivers/thermal/thermal_core.c | 3 +++
> >  1 file changed, 3 insertions(+)
> > 
> > diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> > index 911fd964c742..0fa497f10d25 100644
> > --- a/drivers/thermal/thermal_core.c
> > +++ b/drivers/thermal/thermal_core.c
> > @@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device 
> > *tz, int *temp)
> > mutex_lock(>lock);
> >  
> > ret = tz->ops->get_temp(tz, temp);
> > +   if (ret)
> > +   goto exit_unlock;
> 
> Yeah, but the follow through is intentional, if I am not mistaken.

OK...but it has a bug. It potentially utilizes an uninitialized value
for *temp.

> >  
> > if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {
> 
> Even if the driver is not able to read real temperature, but emul temp
> is configured, then there is still opportunity to report the emulated
> temperature.

OK, maybe, but you should avoid doing this comparison then:

513 if (!ret && *temp < crit_temp)
514 *temp = tz->emul_temperature;

Note that 'ret' might be 0 (from the calls to ->get_trip_type()), and then
you're comparing with the uninitialized value of *temp. So you need some
solution that accounts for this and decides to ignore the real
temperature properly.

> > for (count = 0; count < tz->trips; count++) {
> > @@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device 
> > *tz, int *temp)
> > *temp = tz->emul_temperature;
> 
> And if you check the lines at the bottom of the loop, you will see that,
> in the fail case, we will stil compare to what is the content of temp,
> which might be problematic.

Yes...are you saying the same thing I am above?

> I would prefer we consider the patch I sent
> some time ago:
> https://patchwork.kernel.org/patch/7876381/

Honestly I didn't look that deeply into the framework here (and I also
don't use CONFIG_THERMAL_EMULATION), I was just fixing something that
was obviously wrong.

But on first read, that patch looks good to me -- although it'd be good
to note the uninitialized value fix in the comit log. Any reason that
didn't end up getting merged? It looks like it got reviewed, and you're
a thermal subsystem maintainer...

Brian


[PATCH] timekeeping: Change type of nsec variable to unsigned in its calculation.

2016-11-18 Thread John Stultz
From: Liav Rehana 

During the calculation of the nsec variable in the inline function
timekeeping_delta_to_ns, it may undergo a sign extension if its msb
is set just before the shift. The sign extension may, in some cases,
gain it a value near the maximum value of the 64-bit range. This is
bad when it is later used in a division function, such as
__iter_div_u64_rem, where the amount of loops it will go through to
calculate the division will be too large. One can encounter such a
problem, for example, when trying to connect through ftp from an
outside host to the operation system. When the OS is too overloaded,
delta will get a high enough value for the msb of the sum
delta * tkr->mult + tkr->xtime_nsec to be set, and so after the
shift the nsec variable will gain a value similar to
0xff00. Using a variable with such a value in the
inline function __iter_div_u64_rem will take too long, making the
ftp connection attempt seem to get stuck.
The following commit fixes that chance of sign extension, while
maintaining the type of the nsec variable as signed for other
functions that use this variable, for possible legit negative
time intervals.

Cc: Chris Metcalf 
Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Laurent Vivier 
Cc: David Gibson 
Cc: "Christopher S . Hall" 
Cc: sta...@vger.kernel.org  (4.6+)
Fixes: 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation")
Also-Reported-by: Chris Metcalf 
Signed-off-by: Liav Rehana 
Signed-off-by: John Stultz 
---
Thomas/Ingo: This is for tip:timers/urgent.

 kernel/time/timekeeping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e..46e312e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -299,10 +299,10 @@ u32 (*arch_gettimeoffset)(void) = 
default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
  cycle_t delta)
 {
-   s64 nsec;
+   u64 nsec;
 
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
-- 
2.7.4



[PATCH] timekeeping: Change type of nsec variable to unsigned in its calculation.

2016-11-18 Thread John Stultz
From: Liav Rehana 

During the calculation of the nsec variable in the inline function
timekeeping_delta_to_ns, it may undergo a sign extension if its msb
is set just before the shift. The sign extension may, in some cases,
gain it a value near the maximum value of the 64-bit range. This is
bad when it is later used in a division function, such as
__iter_div_u64_rem, where the amount of loops it will go through to
calculate the division will be too large. One can encounter such a
problem, for example, when trying to connect through ftp from an
outside host to the operation system. When the OS is too overloaded,
delta will get a high enough value for the msb of the sum
delta * tkr->mult + tkr->xtime_nsec to be set, and so after the
shift the nsec variable will gain a value similar to
0xff00. Using a variable with such a value in the
inline function __iter_div_u64_rem will take too long, making the
ftp connection attempt seem to get stuck.
The following commit fixes that chance of sign extension, while
maintaining the type of the nsec variable as signed for other
functions that use this variable, for possible legit negative
time intervals.

Cc: Chris Metcalf 
Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Laurent Vivier 
Cc: David Gibson 
Cc: "Christopher S . Hall" 
Cc: sta...@vger.kernel.org  (4.6+)
Fixes: 6bd58f09e1d8 ("time: Add cycles to nanoseconds translation")
Also-Reported-by: Chris Metcalf 
Signed-off-by: Liav Rehana 
Signed-off-by: John Stultz 
---
Thomas/Ingo: This is for tip:timers/urgent.

 kernel/time/timekeeping.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 37dec7e..46e312e 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -299,10 +299,10 @@ u32 (*arch_gettimeoffset)(void) = 
default_arch_gettimeoffset;
 static inline u32 arch_gettimeoffset(void) { return 0; }
 #endif
 
-static inline s64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
+static inline u64 timekeeping_delta_to_ns(struct tk_read_base *tkr,
  cycle_t delta)
 {
-   s64 nsec;
+   u64 nsec;
 
nsec = delta * tkr->mult + tkr->xtime_nsec;
nsec >>= tkr->shift;
-- 
2.7.4



[PATCH 2/4] selftests/timers: Fix spelling mistake "Asyncrhonous" -> "Asynchronous"

2016-11-18 Thread John Stultz
From: Colin Ian King 

Trivial fix to spelling mistake

Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Shuah Khan 
Cc: Prarit Bhargava 
Signed-off-by: Colin Ian King 
Signed-off-by: John Stultz 
---
 tools/testing/selftests/timers/skew_consistency.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/timers/skew_consistency.c 
b/tools/testing/selftests/timers/skew_consistency.c
index 5562f84..2a996e0 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -57,7 +57,7 @@ int main(int argv, char **argc)
pid_t pid;
 
 
-   printf("Running Asyncrhonous Frequency Changing Tests...\n");
+   printf("Running Asynchronous Frequency Changing Tests...\n");
 
pid = fork();
if (!pid)
-- 
2.7.4



[PATCH 1/4] time: alarmtimer: Add the trcepoints for alarmtimer

2016-11-18 Thread John Stultz
From: Baolin Wang 

For system debugging, we sometimes want to know who sets one
alarm timer, the time of the timer, when the timer started and
fired and so on. Thus adding tracepoints can help us trace the
alarmtimer information.

For example, when we debug the system supend/resume, if the
system is always resumed by RTC alarm, we can find out which
process set the alarm timer to resume system by below trace log:

..

Binder:3292_2-3304  [000] d..2   149.981123: alarmtimer_cancel:
alarmtimer:ffc1319a7800 type:REALTIME
expires:13254631200 now:1325376810370370245

Binder:3292_2-3304  [000] d..2   149.981136: alarmtimer_start:
alarmtimer:ffc1319a7800 type:REALTIME
expires:13253768400 now:1325376810370384591

Binder:3292_9-3953  [000] d..2   150.212991: alarmtimer_cancel:
alarmtimer:ffc1319a5a00 type:BOOTTIME
expires:17955200 now:150154008122

Binder:3292_9-3953  [000] d..2   150.213006: alarmtimer_start:
alarmtimer:ffc1319a5a00 type:BOOTTIME
expires:17955100 now:150154025622

..

system_server-3000  [002] ...1  162.701940: alarmtimer_suspend:
alarmtimer type:REALTIME expires:1325376839802714584

..

>From the trace log, we can find out the 'Binder:3292_2' process
set one alarm timer which resumes the system.

Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Steven Rostedt 
Acked-by: Steven Rostedt 
Signed-off-by: Baolin Wang 
Signed-off-by: John Stultz 
---
 include/trace/events/alarmtimer.h | 92 +++
 kernel/time/alarmtimer.c  | 16 ++-
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/alarmtimer.h

diff --git a/include/trace/events/alarmtimer.h 
b/include/trace/events/alarmtimer.h
new file mode 100644
index 000..61ea556
--- /dev/null
+++ b/include/trace/events/alarmtimer.h
@@ -0,0 +1,92 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM alarmtimer
+
+#if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ALARMTIMER_H
+
+#include 
+#include 
+#include 
+
+TRACE_DEFINE_ENUM(ALARM_REALTIME);
+TRACE_DEFINE_ENUM(ALARM_BOOTTIME);
+
+#define show_alarm_type(type)  __print_flags(type, " | ",  \
+   { 1 << ALARM_REALTIME, "REALTIME" },\
+   { 1 << ALARM_BOOTTIME, "BOOTTIME" })
+
+TRACE_EVENT(alarmtimer_suspend,
+
+   TP_PROTO(ktime_t expires, int flag),
+
+   TP_ARGS(expires, flag),
+
+   TP_STRUCT__entry(
+   __field(s64, expires)
+   __field(unsigned char, alarm_type)
+   ),
+
+   TP_fast_assign(
+   __entry->expires = expires.tv64;
+   __entry->alarm_type = flag;
+   ),
+
+   TP_printk("alarmtimer type:%s expires:%llu",
+ show_alarm_type((1 << __entry->alarm_type)),
+ __entry->expires
+   )
+);
+
+DECLARE_EVENT_CLASS(alarm_class,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now),
+
+   TP_STRUCT__entry(
+   __field(void *, alarm)
+   __field(unsigned char, alarm_type)
+   __field(s64, expires)
+   __field(s64, now)
+   ),
+
+   TP_fast_assign(
+   __entry->alarm = alarm;
+   __entry->alarm_type = alarm->type;
+   __entry->expires = alarm->node.expires.tv64;
+   __entry->now = now.tv64;
+   ),
+
+   TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu",
+ __entry->alarm,
+ show_alarm_type((1 << __entry->alarm_type)),
+ __entry->expires,
+ __entry->now
+   )
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_fired,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_start,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_cancel,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+#endif /* _TRACE_ALARMTIMER_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 12dd190..8084e0c 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 /**
  * struct alarm_base - Alarm timer bases
  * @lock:  Lock for syncrhonized access to the base
@@ -194,6 +197,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer 
*timer)
}
spin_unlock_irqrestore(>lock, flags);
 
+   trace_alarmtimer_fired(alarm, base->gettime());
return ret;
 
 }
@@ -222,7 +226,7 @@ static int alarmtimer_suspend(struct device *dev)

[PATCH 2/4] selftests/timers: Fix spelling mistake "Asyncrhonous" -> "Asynchronous"

2016-11-18 Thread John Stultz
From: Colin Ian King 

Trivial fix to spelling mistake

Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Shuah Khan 
Cc: Prarit Bhargava 
Signed-off-by: Colin Ian King 
Signed-off-by: John Stultz 
---
 tools/testing/selftests/timers/skew_consistency.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/timers/skew_consistency.c 
b/tools/testing/selftests/timers/skew_consistency.c
index 5562f84..2a996e0 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -57,7 +57,7 @@ int main(int argv, char **argc)
pid_t pid;
 
 
-   printf("Running Asyncrhonous Frequency Changing Tests...\n");
+   printf("Running Asynchronous Frequency Changing Tests...\n");
 
pid = fork();
if (!pid)
-- 
2.7.4



[PATCH 1/4] time: alarmtimer: Add the trcepoints for alarmtimer

2016-11-18 Thread John Stultz
From: Baolin Wang 

For system debugging, we sometimes want to know who sets one
alarm timer, the time of the timer, when the timer started and
fired and so on. Thus adding tracepoints can help us trace the
alarmtimer information.

For example, when we debug the system supend/resume, if the
system is always resumed by RTC alarm, we can find out which
process set the alarm timer to resume system by below trace log:

..

Binder:3292_2-3304  [000] d..2   149.981123: alarmtimer_cancel:
alarmtimer:ffc1319a7800 type:REALTIME
expires:13254631200 now:1325376810370370245

Binder:3292_2-3304  [000] d..2   149.981136: alarmtimer_start:
alarmtimer:ffc1319a7800 type:REALTIME
expires:13253768400 now:1325376810370384591

Binder:3292_9-3953  [000] d..2   150.212991: alarmtimer_cancel:
alarmtimer:ffc1319a5a00 type:BOOTTIME
expires:17955200 now:150154008122

Binder:3292_9-3953  [000] d..2   150.213006: alarmtimer_start:
alarmtimer:ffc1319a5a00 type:BOOTTIME
expires:17955100 now:150154025622

..

system_server-3000  [002] ...1  162.701940: alarmtimer_suspend:
alarmtimer type:REALTIME expires:1325376839802714584

..

>From the trace log, we can find out the 'Binder:3292_2' process
set one alarm timer which resumes the system.

Cc: Thomas Gleixner 
Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Steven Rostedt 
Acked-by: Steven Rostedt 
Signed-off-by: Baolin Wang 
Signed-off-by: John Stultz 
---
 include/trace/events/alarmtimer.h | 92 +++
 kernel/time/alarmtimer.c  | 16 ++-
 2 files changed, 106 insertions(+), 2 deletions(-)
 create mode 100644 include/trace/events/alarmtimer.h

diff --git a/include/trace/events/alarmtimer.h 
b/include/trace/events/alarmtimer.h
new file mode 100644
index 000..61ea556
--- /dev/null
+++ b/include/trace/events/alarmtimer.h
@@ -0,0 +1,92 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM alarmtimer
+
+#if !defined(_TRACE_ALARMTIMER_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ALARMTIMER_H
+
+#include 
+#include 
+#include 
+
+TRACE_DEFINE_ENUM(ALARM_REALTIME);
+TRACE_DEFINE_ENUM(ALARM_BOOTTIME);
+
+#define show_alarm_type(type)  __print_flags(type, " | ",  \
+   { 1 << ALARM_REALTIME, "REALTIME" },\
+   { 1 << ALARM_BOOTTIME, "BOOTTIME" })
+
+TRACE_EVENT(alarmtimer_suspend,
+
+   TP_PROTO(ktime_t expires, int flag),
+
+   TP_ARGS(expires, flag),
+
+   TP_STRUCT__entry(
+   __field(s64, expires)
+   __field(unsigned char, alarm_type)
+   ),
+
+   TP_fast_assign(
+   __entry->expires = expires.tv64;
+   __entry->alarm_type = flag;
+   ),
+
+   TP_printk("alarmtimer type:%s expires:%llu",
+ show_alarm_type((1 << __entry->alarm_type)),
+ __entry->expires
+   )
+);
+
+DECLARE_EVENT_CLASS(alarm_class,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now),
+
+   TP_STRUCT__entry(
+   __field(void *, alarm)
+   __field(unsigned char, alarm_type)
+   __field(s64, expires)
+   __field(s64, now)
+   ),
+
+   TP_fast_assign(
+   __entry->alarm = alarm;
+   __entry->alarm_type = alarm->type;
+   __entry->expires = alarm->node.expires.tv64;
+   __entry->now = now.tv64;
+   ),
+
+   TP_printk("alarmtimer:%p type:%s expires:%llu now:%llu",
+ __entry->alarm,
+ show_alarm_type((1 << __entry->alarm_type)),
+ __entry->expires,
+ __entry->now
+   )
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_fired,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_start,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+DEFINE_EVENT(alarm_class, alarmtimer_cancel,
+
+   TP_PROTO(struct alarm *alarm, ktime_t now),
+
+   TP_ARGS(alarm, now)
+);
+
+#endif /* _TRACE_ALARMTIMER_H */
+
+/* This part must be outside protection */
+#include 
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index 12dd190..8084e0c 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -26,6 +26,9 @@
 #include 
 #include 
 
+#define CREATE_TRACE_POINTS
+#include 
+
 /**
  * struct alarm_base - Alarm timer bases
  * @lock:  Lock for syncrhonized access to the base
@@ -194,6 +197,7 @@ static enum hrtimer_restart alarmtimer_fired(struct hrtimer 
*timer)
}
spin_unlock_irqrestore(>lock, flags);
 
+   trace_alarmtimer_fired(alarm, base->gettime());
return ret;
 
 }
@@ -222,7 +226,7 @@ static int alarmtimer_suspend(struct device *dev)
ktime_t min, now;
unsigned long flags;
struct rtc_device *rtc;
-   int i;
+   int i, type = 0;
int ret;
 
spin_lock_irqsave(_delta_lock, 

[PATCH 4/4] timekeeping: clocksource_cyc2ns: Document intended range limitation

2016-11-18 Thread John Stultz
From: Chris Metcalf 

The "cycles" argument should not be an absolute clocksource cycle
value, as the implementation's arithmetic will overflow relatively
easily with wide (64 bit) clocksource counters.

For performance, the implementation is simple and fast, since the
function is intended for only relatively small delta values of
clocksource cycles.

Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Thomas Gleixner 
Signed-off-by: Chris Metcalf 
[jstultz: Fixed up to merge against HEAD & commit message tweaks]
Signed-off-by: John Stultz 
---
 include/linux/clocksource.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0839818..0881bca 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -169,7 +169,10 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 
shift_constant)
  * @mult:  cycle to nanosecond multiplier
  * @shift: cycle to nanosecond divisor (power of two)
  *
- * Converts cycles to nanoseconds, using the given mult and shift.
+ * Converts clocksource cycles to nanoseconds, using the given mult and shift.
+ * The code is optimized for performance and not intended to work
+ * with absolute clocksource cycles, as it will easily overflow,
+ * but just intended for relative (delta) clocksource cycles.
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-- 
2.7.4



[PATCH 3/4] timekeeping: Ignore the bogus sleep time if pm_trace is enabled

2016-11-18 Thread John Stultz
From: Chen Yu 

Previously we encountered some memory overflow issues due to
the bogus sleep time brought by inconsistent rtc, which is
triggered when pm_trace is enabled, and we have fixed it
in recent kernel. However it's improper in the first place
to call __timekeeping_inject_sleeptime() in case that pm_trace
is enabled simply because that "hash" time value will wreckage
the timekeeping subsystem.

This patch is originally written by Thomas, which would bypass
the bogus rtc interval when pm_trace is enabled.
Meanwhile, if system succeed to resume back with pm_trace set, the
users are warned to adjust the bogus rtc either by ntp-date or rdate,
by resetting pm_trace_rtc_abused to false, otherwise above tools might
not work as expected.

Originally-from: Thomas Gleixner 
Cc: "Rafael J. Wysocki" 
Cc: John Stultz 
Cc: Xunlei Pang 
Cc: Ingo Molnar 
Cc: Len Brown 
Cc: "H. Peter Anvin" 
Cc: Pavel Machek 
Cc: Thomas Gleixner 
Cc: Prarit Bhargava 
Cc: Richard Cochran 
Acked-by: Pavel Machek 
Acked-by: Thomas Gleixner 
Signed-off-by: Chen Yu 
Signed-off-by: John Stultz 
---
 arch/x86/kernel/rtc.c   |  9 +
 drivers/base/power/trace.c  | 27 +++
 drivers/rtc/rtc-cmos.c  |  7 +++
 include/linux/mc146818rtc.h |  1 +
 include/linux/pm-trace.h|  9 -
 5 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 79c6311c..898383c 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -64,6 +64,15 @@ void mach_get_cmos_time(struct timespec *now)
unsigned int status, year, mon, day, hour, min, sec, century = 0;
unsigned long flags;
 
+   /*
+* If pm trace abused the RTC as storage set the timespec to 0
+* which tells the caller that this RTC value is bogus.
+*/
+   if (!pm_trace_rtc_valid()) {
+   now->tv_sec = now->tv_nsec = 0;
+   return;
+   }
+
spin_lock_irqsave(_lock, flags);
 
/*
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index efec10b..209e214 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -74,6 +75,9 @@
 
 #define DEVSEED (7919)
 
+bool pm_trace_rtc_abused __read_mostly;
+EXPORT_SYMBOL(pm_trace_rtc_abused);
+
 static unsigned int dev_hash_value;
 
 static int set_magic_time(unsigned int user, unsigned int file, unsigned int 
device)
@@ -104,6 +108,7 @@ static int set_magic_time(unsigned int user, unsigned int 
file, unsigned int dev
time.tm_min = (n % 20) * 3;
n /= 20;
mc146818_set_time();
+   pm_trace_rtc_abused = true;
return n ? -1 : 0;
 }
 
@@ -238,10 +243,32 @@ int show_trace_dev_match(char *buf, size_t size)
device_pm_unlock();
return ret;
 }
+static int pm_trace_notify(struct notifier_block *nb,
+   unsigned long mode, void *_unused)
+{
+   switch (mode) {
+   case PM_POST_HIBERNATION:
+   case PM_POST_SUSPEND:
+   if (pm_trace_rtc_abused) {
+   pm_trace_rtc_abused = false;
+   pr_warn("Possible incorrect RTC due to pm_trace,"
+   "please use ntp-date or rdate to reset.\n");
+   }
+   break;
+   default:
+   break;
+   }
+   return 0;
+}
+
+static struct notifier_block pm_trace_nb = {
+   .notifier_call = pm_trace_notify,
+};
 
 static int early_resume_init(void)
 {
hash_value_early_read = read_magic_time();
+   register_pm_notifier(_trace_nb);
return 0;
 }
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index dd3d598..3d9aedc 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -191,6 +191,13 @@ static inline void cmos_write_bank2(unsigned char val, 
unsigned char addr)
 
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
+   /*
+* If pmtrace abused the RTC for storage tell the caller that it is
+* unusable.
+*/
+   if (!pm_trace_rtc_valid())
+   return -EIO;
+
/* REVISIT:  if the clock has a "century" register, use
 * that instead of the heuristic in mc146818_get_time().
 * That'll make Y3K compatility (year > 2070) easy!
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index a585b4b..0661af1 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -16,6 +16,7 @@
 #include/* register access macros */
 #include 
 #include 
+#include 
 
 #ifdef __KERNEL__

[PATCH 4/4] timekeeping: clocksource_cyc2ns: Document intended range limitation

2016-11-18 Thread John Stultz
From: Chris Metcalf 

The "cycles" argument should not be an absolute clocksource cycle
value, as the implementation's arithmetic will overflow relatively
easily with wide (64 bit) clocksource counters.

For performance, the implementation is simple and fast, since the
function is intended for only relatively small delta values of
clocksource cycles.

Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Thomas Gleixner 
Signed-off-by: Chris Metcalf 
[jstultz: Fixed up to merge against HEAD & commit message tweaks]
Signed-off-by: John Stultz 
---
 include/linux/clocksource.h | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 0839818..0881bca 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -169,7 +169,10 @@ static inline u32 clocksource_hz2mult(u32 hz, u32 
shift_constant)
  * @mult:  cycle to nanosecond multiplier
  * @shift: cycle to nanosecond divisor (power of two)
  *
- * Converts cycles to nanoseconds, using the given mult and shift.
+ * Converts clocksource cycles to nanoseconds, using the given mult and shift.
+ * The code is optimized for performance and not intended to work
+ * with absolute clocksource cycles, as it will easily overflow,
+ * but just intended for relative (delta) clocksource cycles.
  *
  * XXX - This could use some mult_lxl_ll() asm optimization
  */
-- 
2.7.4



[PATCH 3/4] timekeeping: Ignore the bogus sleep time if pm_trace is enabled

2016-11-18 Thread John Stultz
From: Chen Yu 

Previously we encountered some memory overflow issues due to
the bogus sleep time brought by inconsistent rtc, which is
triggered when pm_trace is enabled, and we have fixed it
in recent kernel. However it's improper in the first place
to call __timekeeping_inject_sleeptime() in case that pm_trace
is enabled simply because that "hash" time value will wreckage
the timekeeping subsystem.

This patch is originally written by Thomas, which would bypass
the bogus rtc interval when pm_trace is enabled.
Meanwhile, if system succeed to resume back with pm_trace set, the
users are warned to adjust the bogus rtc either by ntp-date or rdate,
by resetting pm_trace_rtc_abused to false, otherwise above tools might
not work as expected.

Originally-from: Thomas Gleixner 
Cc: "Rafael J. Wysocki" 
Cc: John Stultz 
Cc: Xunlei Pang 
Cc: Ingo Molnar 
Cc: Len Brown 
Cc: "H. Peter Anvin" 
Cc: Pavel Machek 
Cc: Thomas Gleixner 
Cc: Prarit Bhargava 
Cc: Richard Cochran 
Acked-by: Pavel Machek 
Acked-by: Thomas Gleixner 
Signed-off-by: Chen Yu 
Signed-off-by: John Stultz 
---
 arch/x86/kernel/rtc.c   |  9 +
 drivers/base/power/trace.c  | 27 +++
 drivers/rtc/rtc-cmos.c  |  7 +++
 include/linux/mc146818rtc.h |  1 +
 include/linux/pm-trace.h|  9 -
 5 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c
index 79c6311c..898383c 100644
--- a/arch/x86/kernel/rtc.c
+++ b/arch/x86/kernel/rtc.c
@@ -64,6 +64,15 @@ void mach_get_cmos_time(struct timespec *now)
unsigned int status, year, mon, day, hour, min, sec, century = 0;
unsigned long flags;
 
+   /*
+* If pm trace abused the RTC as storage set the timespec to 0
+* which tells the caller that this RTC value is bogus.
+*/
+   if (!pm_trace_rtc_valid()) {
+   now->tv_sec = now->tv_nsec = 0;
+   return;
+   }
+
spin_lock_irqsave(_lock, flags);
 
/*
diff --git a/drivers/base/power/trace.c b/drivers/base/power/trace.c
index efec10b..209e214 100644
--- a/drivers/base/power/trace.c
+++ b/drivers/base/power/trace.c
@@ -10,6 +10,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -74,6 +75,9 @@
 
 #define DEVSEED (7919)
 
+bool pm_trace_rtc_abused __read_mostly;
+EXPORT_SYMBOL(pm_trace_rtc_abused);
+
 static unsigned int dev_hash_value;
 
 static int set_magic_time(unsigned int user, unsigned int file, unsigned int 
device)
@@ -104,6 +108,7 @@ static int set_magic_time(unsigned int user, unsigned int 
file, unsigned int dev
time.tm_min = (n % 20) * 3;
n /= 20;
mc146818_set_time();
+   pm_trace_rtc_abused = true;
return n ? -1 : 0;
 }
 
@@ -238,10 +243,32 @@ int show_trace_dev_match(char *buf, size_t size)
device_pm_unlock();
return ret;
 }
+static int pm_trace_notify(struct notifier_block *nb,
+   unsigned long mode, void *_unused)
+{
+   switch (mode) {
+   case PM_POST_HIBERNATION:
+   case PM_POST_SUSPEND:
+   if (pm_trace_rtc_abused) {
+   pm_trace_rtc_abused = false;
+   pr_warn("Possible incorrect RTC due to pm_trace,"
+   "please use ntp-date or rdate to reset.\n");
+   }
+   break;
+   default:
+   break;
+   }
+   return 0;
+}
+
+static struct notifier_block pm_trace_nb = {
+   .notifier_call = pm_trace_notify,
+};
 
 static int early_resume_init(void)
 {
hash_value_early_read = read_magic_time();
+   register_pm_notifier(_trace_nb);
return 0;
 }
 
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index dd3d598..3d9aedc 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -191,6 +191,13 @@ static inline void cmos_write_bank2(unsigned char val, 
unsigned char addr)
 
 static int cmos_read_time(struct device *dev, struct rtc_time *t)
 {
+   /*
+* If pmtrace abused the RTC for storage tell the caller that it is
+* unusable.
+*/
+   if (!pm_trace_rtc_valid())
+   return -EIO;
+
/* REVISIT:  if the clock has a "century" register, use
 * that instead of the heuristic in mc146818_get_time().
 * That'll make Y3K compatility (year > 2070) easy!
diff --git a/include/linux/mc146818rtc.h b/include/linux/mc146818rtc.h
index a585b4b..0661af1 100644
--- a/include/linux/mc146818rtc.h
+++ b/include/linux/mc146818rtc.h
@@ -16,6 +16,7 @@
 #include/* register access macros */
 #include 
 #include 
+#include 
 
 #ifdef __KERNEL__
 #include /* spinlock_t */
diff --git a/include/linux/pm-trace.h b/include/linux/pm-trace.h
index ecbde7a..7b78793 100644
--- a/include/linux/pm-trace.h
+++ b/include/linux/pm-trace.h
@@ -1,11 +1,17 @@
 #ifndef PM_TRACE_H
 #define PM_TRACE_H
 
+#include 
 #ifdef CONFIG_PM_TRACE
 #include 
-#include 

[GIT PULL][PATCH 0/4] Timekeeping items for 4.10

2016-11-18 Thread John Stultz
Hey Thomas, Ingo,
  Just a few small patches I have queued for 4.10.

Please let me know if you have any objections.

You can grab the patches via git pull as specified below.

thanks
-john

Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Thomas Gleixner 


The following changes since commit a909d3e636995ba7c349e2ca5dbb528154d4ac30:

  Linux 4.9-rc3 (2016-10-29 13:52:02 -0700)

are available in the git repository at:

  https://git.linaro.org/people/john.stultz/linux.git fortglx/4.10/time

for you to fetch changes up to 61aa18b038bf53be5b67ece7db86ad78be0716a4:

  timekeeping: clocksource_cyc2ns: Document intended range limitation 
(2016-11-18 20:39:49 -0800)


Baolin Wang (1):
  time: alarmtimer: Add the trcepoints for alarmtimer

Chen Yu (1):
  timekeeping: Ignore the bogus sleep time if pm_trace is enabled

Chris Metcalf (1):
  timekeeping: clocksource_cyc2ns: Document intended range limitation

Colin Ian King (1):
  selftests/timers: Fix spelling mistake "Asyncrhonous" ->
"Asynchronous"

 arch/x86/kernel/rtc.c |  9 +++
 drivers/base/power/trace.c| 27 +++
 drivers/rtc/rtc-cmos.c|  7 ++
 include/linux/clocksource.h   |  5 +-
 include/linux/mc146818rtc.h   |  1 +
 include/linux/pm-trace.h  |  9 ++-
 include/trace/events/alarmtimer.h | 92 +++
 kernel/time/alarmtimer.c  | 16 +++-
 tools/testing/selftests/timers/skew_consistency.c |  2 +-
 9 files changed, 163 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/alarmtimer.h

-- 
2.7.4



[GIT PULL][PATCH 0/4] Timekeeping items for 4.10

2016-11-18 Thread John Stultz
Hey Thomas, Ingo,
  Just a few small patches I have queued for 4.10.

Please let me know if you have any objections.

You can grab the patches via git pull as specified below.

thanks
-john

Cc: Richard Cochran 
Cc: Ingo Molnar 
Cc: Prarit Bhargava 
Cc: Thomas Gleixner 


The following changes since commit a909d3e636995ba7c349e2ca5dbb528154d4ac30:

  Linux 4.9-rc3 (2016-10-29 13:52:02 -0700)

are available in the git repository at:

  https://git.linaro.org/people/john.stultz/linux.git fortglx/4.10/time

for you to fetch changes up to 61aa18b038bf53be5b67ece7db86ad78be0716a4:

  timekeeping: clocksource_cyc2ns: Document intended range limitation 
(2016-11-18 20:39:49 -0800)


Baolin Wang (1):
  time: alarmtimer: Add the trcepoints for alarmtimer

Chen Yu (1):
  timekeeping: Ignore the bogus sleep time if pm_trace is enabled

Chris Metcalf (1):
  timekeeping: clocksource_cyc2ns: Document intended range limitation

Colin Ian King (1):
  selftests/timers: Fix spelling mistake "Asyncrhonous" ->
"Asynchronous"

 arch/x86/kernel/rtc.c |  9 +++
 drivers/base/power/trace.c| 27 +++
 drivers/rtc/rtc-cmos.c|  7 ++
 include/linux/clocksource.h   |  5 +-
 include/linux/mc146818rtc.h   |  1 +
 include/linux/pm-trace.h  |  9 ++-
 include/trace/events/alarmtimer.h | 92 +++
 kernel/time/alarmtimer.c  | 16 +++-
 tools/testing/selftests/timers/skew_consistency.c |  2 +-
 9 files changed, 163 insertions(+), 5 deletions(-)
 create mode 100644 include/trace/events/alarmtimer.h

-- 
2.7.4



[PATCH] Tools: hv: Add binaries to .gitignore

2016-11-18 Thread Alex Fluter
The three hv tools should be ignored by git
so that they do not appear as untracked files.

Signed-off-by: Alex Fluter 
---
 tools/hv/.gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tools/hv/.gitignore

diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore
new file mode 100644
index 000..88b05e3
--- /dev/null
+++ b/tools/hv/.gitignore
@@ -0,0 +1,3 @@
+hv_fcopy_daemon
+hv_kvp_daemon
+hv_vss_daemon
-- 
2.7.4



[PATCH] Tools: hv: Add binaries to .gitignore

2016-11-18 Thread Alex Fluter
The three hv tools should be ignored by git
so that they do not appear as untracked files.

Signed-off-by: Alex Fluter 
---
 tools/hv/.gitignore | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 tools/hv/.gitignore

diff --git a/tools/hv/.gitignore b/tools/hv/.gitignore
new file mode 100644
index 000..88b05e3
--- /dev/null
+++ b/tools/hv/.gitignore
@@ -0,0 +1,3 @@
+hv_fcopy_daemon
+hv_kvp_daemon
+hv_vss_daemon
-- 
2.7.4



[PATCH] spi: davinci: Allow device tree devices to use DMA

2016-11-18 Thread David Lechner
This makes SPI devices specified in a device tree use DMA when the master
controller has DMA configured.

Since device tree is supposed to only describe the hardware, adding a
configuration option to device tree to enable DMA per-device would not be
acceptable. So, this is the best we can do for now to get SPI devices
working with DMA when using device tree.

Unfortunately, this excludes the possibility of using one SPI device with
DMA and one without on the same master.

I have tested this on LEGO MINDSTORMS EV3 using the NOR flash. Reading the
flash memory would fail with -EIO when DMA is not enabled for the device.

Signed-off-by: David Lechner 
---
 drivers/spi/spi-davinci.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index d36c11b..c6cf73a 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -388,6 +388,7 @@ static int davinci_spi_setup_transfer(struct spi_device 
*spi,
 static int davinci_spi_of_setup(struct spi_device *spi)
 {
struct davinci_spi_config *spicfg = spi->controller_data;
+   struct davinci_spi *dspi = spi_master_get_devdata(spi->master);
struct device_node *np = spi->dev.of_node;
u32 prop;
 
@@ -400,6 +401,9 @@ static int davinci_spi_of_setup(struct spi_device *spi)
if (!of_property_read_u32(np, "ti,spi-wdelay", ))
spicfg->wdelay = (u8)prop;
spi->controller_data = spicfg;
+   /* Use DMA for device if master supports it */
+   if (dspi->dma_rx)
+   spicfg->io_type = SPI_IO_TYPE_DMA;
}
 
return 0;
-- 
2.7.4



[PATCH] spi: davinci: Allow device tree devices to use DMA

2016-11-18 Thread David Lechner
This makes SPI devices specified in a device tree use DMA when the master
controller has DMA configured.

Since device tree is supposed to only describe the hardware, adding a
configuration option to device tree to enable DMA per-device would not be
acceptable. So, this is the best we can do for now to get SPI devices
working with DMA when using device tree.

Unfortunately, this excludes the possibility of using one SPI device with
DMA and one without on the same master.

I have tested this on LEGO MINDSTORMS EV3 using the NOR flash. Reading the
flash memory would fail with -EIO when DMA is not enabled for the device.

Signed-off-by: David Lechner 
---
 drivers/spi/spi-davinci.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/drivers/spi/spi-davinci.c b/drivers/spi/spi-davinci.c
index d36c11b..c6cf73a 100644
--- a/drivers/spi/spi-davinci.c
+++ b/drivers/spi/spi-davinci.c
@@ -388,6 +388,7 @@ static int davinci_spi_setup_transfer(struct spi_device 
*spi,
 static int davinci_spi_of_setup(struct spi_device *spi)
 {
struct davinci_spi_config *spicfg = spi->controller_data;
+   struct davinci_spi *dspi = spi_master_get_devdata(spi->master);
struct device_node *np = spi->dev.of_node;
u32 prop;
 
@@ -400,6 +401,9 @@ static int davinci_spi_of_setup(struct spi_device *spi)
if (!of_property_read_u32(np, "ti,spi-wdelay", ))
spicfg->wdelay = (u8)prop;
spi->controller_data = spicfg;
+   /* Use DMA for device if master supports it */
+   if (dspi->dma_rx)
+   spicfg->io_type = SPI_IO_TYPE_DMA;
}
 
return 0;
-- 
2.7.4



[kvm-unit-tests PATCH v9 1/3] arm: Add PMU test

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
Reviewed-by: Andrew Jones 
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 74 +
 arm/unittests.cfg   |  5 
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index ccb554d..f98f422 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -11,7 +11,8 @@ endif
 
 tests-common = \
$(TEST_DIR)/selftest.flat \
-   $(TEST_DIR)/spinlock-test.flat
+   $(TEST_DIR)/spinlock-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..9d9c53b
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,74 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 3f6fa45..7645180 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -54,3 +54,8 @@ file = selftest.flat
 smp = $MAX_SMP
 extra_params = -append 'smp'
 groups = selftest
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1



[kvm-unit-tests PATCH v9 1/3] arm: Add PMU test

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Beginning with a simple sanity check of the control register, add
a unit test for the ARM Performance Monitors Unit (PMU).

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
Reviewed-by: Andrew Jones 
---
 arm/Makefile.common |  3 ++-
 arm/pmu.c   | 74 +
 arm/unittests.cfg   |  5 
 3 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

diff --git a/arm/Makefile.common b/arm/Makefile.common
index ccb554d..f98f422 100644
--- a/arm/Makefile.common
+++ b/arm/Makefile.common
@@ -11,7 +11,8 @@ endif
 
 tests-common = \
$(TEST_DIR)/selftest.flat \
-   $(TEST_DIR)/spinlock-test.flat
+   $(TEST_DIR)/spinlock-test.flat \
+   $(TEST_DIR)/pmu.flat
 
 all: test_cases
 
diff --git a/arm/pmu.c b/arm/pmu.c
new file mode 100644
index 000..9d9c53b
--- /dev/null
+++ b/arm/pmu.c
@@ -0,0 +1,74 @@
+/*
+ * Test the ARM Performance Monitors Unit (PMU).
+ *
+ * Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License version 2.1 and
+ * only version 2.1 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ */
+#include "libcflat.h"
+#include "asm/barrier.h"
+
+#define PMU_PMCR_N_SHIFT   11
+#define PMU_PMCR_N_MASK0x1f
+#define PMU_PMCR_ID_SHIFT  16
+#define PMU_PMCR_ID_MASK   0xff
+#define PMU_PMCR_IMP_SHIFT 24
+#define PMU_PMCR_IMP_MASK  0xff
+
+#if defined(__arm__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
+   return ret;
+}
+#elif defined(__aarch64__)
+static inline uint32_t pmcr_read(void)
+{
+   uint32_t ret;
+
+   asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
+   return ret;
+}
+#endif
+
+/*
+ * As a simple sanity check on the PMCR_EL0, ensure the implementer field isn't
+ * null. Also print out a couple other interesting fields for diagnostic
+ * purposes. For example, as of fall 2016, QEMU TCG mode doesn't implement
+ * event counters and therefore reports zero event counters, but hopefully
+ * support for at least the instructions event will be added in the future and
+ * the reported number of event counters will become nonzero.
+ */
+static bool check_pmcr(void)
+{
+   uint32_t pmcr;
+
+   pmcr = pmcr_read();
+
+   printf("PMU implementer: %c\n",
+  (pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK);
+   printf("Identification code: 0x%x\n",
+  (pmcr >> PMU_PMCR_ID_SHIFT) & PMU_PMCR_ID_MASK);
+   printf("Event counters:  %d\n",
+  (pmcr >> PMU_PMCR_N_SHIFT) & PMU_PMCR_N_MASK);
+
+   return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
+}
+
+int main(void)
+{
+   report_prefix_push("pmu");
+
+   report("Control register", check_pmcr());
+
+   return report_summary();
+}
diff --git a/arm/unittests.cfg b/arm/unittests.cfg
index 3f6fa45..7645180 100644
--- a/arm/unittests.cfg
+++ b/arm/unittests.cfg
@@ -54,3 +54,8 @@ file = selftest.flat
 smp = $MAX_SMP
 extra_params = -append 'smp'
 groups = selftest
+
+# Test PMU support
+[pmu]
+file = pmu.flat
+groups = pmu
-- 
1.8.3.1



[kvm-unit-tests PATCH v9 0/3] ARM PMU tests

2016-11-18 Thread Wei Huang
Changes from v8:
* Probe PMU version based on ID_DFR0
* pmccntr_read() now returns 64bit and can handle both 32bit and 64bit
  PMCCNTR based on PMU version.
* Add pmccntr_write() support
* Use a common printf format PRId64 to support 64bit variable smoothly in
  test functions
* Add barriers to several PMU write functions
* Verfied on different execution modes

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Wei Huang (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 339 
 arm/unittests.cfg   |  19 +++
 3 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1



[kvm-unit-tests PATCH v9 2/3] arm: pmu: Check cycle count increases

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 156 ++
 1 file changed, 156 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 9d9c53b..fa87de4 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,9 @@
 #include "libcflat.h"
 #include "asm/barrier.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -22,6 +25,14 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -30,6 +41,69 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+   isb();
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+   isb();
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint32_t lo, hi = 0;
+
+   if (pmu_version == 0x3)
+   asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
+   else
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
+
+   return ((uint64_t)hi << 32) | lo;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   uint32_t lo, hi;
+
+   lo = value & 0x;
+   hi = (value >> 32) & 0x;
+
+   if (pmu_version == 0x3)
+   asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
+   else
+   asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t val;
+
+   asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
+   return val;
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -38,6 +112,44 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint64_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   asm volatile("msr pmccntr_el0, %0" : : "r" (value));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t id;
+
+   asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
+   return id;
+}
 #endif
 
 /*
@@ -64,11 +176,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   pmccntr_write(0);
+   pmcr_write(pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+   a = pmccntr_read();
+   b = pmccntr_read();
+
+   if (a >= b) {
+   printf("Read %"PRId64" then %"PRId64".\n", a, b);
+   success = false;
+   break;
+   }
+   }
+
+   pmcr_write(pmcr_read() & ~PMU_PMCR_E);
+
+   return success;
+}
+
+void pmu_init(void)
+{
+   uint32_t dfr0;
+
+   /* probe pmu version */
+   dfr0 = id_dfr0_read();
+   pmu_version = (dfr0 >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
+   printf("PMU version: %d\n", pmu_version);
+   
+   /* init for PMU event access, right now only care about cycle 

[kvm-unit-tests PATCH v9 3/3] arm: pmu: Add CPI checking

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 111 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index fa87de4..b36c4fb 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -104,6 +104,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
return val;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -150,6 +169,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
return id;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -204,6 +242,71 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int i = (num - 1) / 2;
+
+   assert(num >= 3 && ((num - 1) % 2 == 0));
+   loop(i, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+   
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 3; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   pmccntr_write(0);
+   measure_instrs(i, pmcr);
+   cycles = pmccntr_read();
+   printf(" %"PRId64"", cycles);
+
+   /*
+* The cycles taken by the loop above should fit in
+* 32 bits easily. We check the upper 32 bits of the
+* cycle counter to make sure there is no supprise.
+*/
+   if (!cycles || (cpi > 0 && cycles != i * cpi) ||
+   (cycles & 0x)) {
+   printf("\n");
+   return false;
+   }
+
+   sum += cycles;
+   }
+   avg = sum / NR_SAMPLES;
+   printf(" sum=%"PRId64" avg=%"PRId64" avg_ipc=%"PRId64" "
+  "avg_cpi=%"PRId64"\n", sum, avg, i / avg, avg / i);
+   }
+
+   return true;
+}
+
 void pmu_init(void)
 {
uint32_t dfr0;
@@ -218,13 +321,19 @@ void pmu_init(void)
pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
 }
 
-int main(void)
+int main(int argc, char *argv[])

[kvm-unit-tests PATCH v9 0/3] ARM PMU tests

2016-11-18 Thread Wei Huang
Changes from v8:
* Probe PMU version based on ID_DFR0
* pmccntr_read() now returns 64bit and can handle both 32bit and 64bit
  PMCCNTR based on PMU version.
* Add pmccntr_write() support
* Use a common printf format PRId64 to support 64bit variable smoothly in
  test functions
* Add barriers to several PMU write functions
* Verfied on different execution modes

Note:
1) Current KVM code has bugs in handling PMCCFILTR write. A fix (see
below) is required for this unit testing code to work correctly under
KVM mode.
https://lists.cs.columbia.edu/pipermail/kvmarm/2016-November/022134.html.

Thanks,
-Wei

Wei Huang (3):
  arm: Add PMU test
  arm: pmu: Check cycle count increases
  arm: pmu: Add CPI checking

 arm/Makefile.common |   3 +-
 arm/pmu.c   | 339 
 arm/unittests.cfg   |  19 +++
 3 files changed, 360 insertions(+), 1 deletion(-)
 create mode 100644 arm/pmu.c

-- 
1.8.3.1



[kvm-unit-tests PATCH v9 2/3] arm: pmu: Check cycle count increases

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Ensure that reads of the PMCCNTR_EL0 are monotonically increasing,
even for the smallest delta of two subsequent reads.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 156 ++
 1 file changed, 156 insertions(+)

diff --git a/arm/pmu.c b/arm/pmu.c
index 9d9c53b..fa87de4 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -15,6 +15,9 @@
 #include "libcflat.h"
 #include "asm/barrier.h"
 
+#define PMU_PMCR_E (1 << 0)
+#define PMU_PMCR_C (1 << 2)
+#define PMU_PMCR_LC(1 << 6)
 #define PMU_PMCR_N_SHIFT   11
 #define PMU_PMCR_N_MASK0x1f
 #define PMU_PMCR_ID_SHIFT  16
@@ -22,6 +25,14 @@
 #define PMU_PMCR_IMP_SHIFT 24
 #define PMU_PMCR_IMP_MASK  0xff
 
+#define ID_DFR0_PERFMON_SHIFT 24
+#define ID_DFR0_PERFMON_MASK  0xf
+
+#define PMU_CYCLE_IDX 31
+
+#define NR_SAMPLES 10
+
+static unsigned int pmu_version;
 #if defined(__arm__)
 static inline uint32_t pmcr_read(void)
 {
@@ -30,6 +41,69 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r" (value));
+   isb();
+}
+
+static inline void pmselr_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (value));
+   isb();
+}
+
+static inline void pmxevtyper_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (value));
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint32_t lo, hi = 0;
+
+   if (pmu_version == 0x3)
+   asm volatile("mrrc p15, 0, %0, %1, c9" : "=r" (lo), "=r" (hi));
+   else
+   asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (lo));
+
+   return ((uint64_t)hi << 32) | lo;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   uint32_t lo, hi;
+
+   lo = value & 0x;
+   hi = (value >> 32) & 0x;
+
+   if (pmu_version == 0x3)
+   asm volatile("mcrr p15, 0, %0, %1, c9" : : "r" (lo), "r" (hi));
+   else
+   asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (lo));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (value));
+}
+
+/* PMCCFILTR is an obsolete name for PMXEVTYPER31 in ARMv7 */
+static inline void pmccfiltr_write(uint32_t value)
+{
+   pmselr_write(PMU_CYCLE_IDX);
+   pmxevtyper_write(value);
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t val;
+
+   asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
+   return val;
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -38,6 +112,44 @@ static inline uint32_t pmcr_read(void)
asm volatile("mrs %0, pmcr_el0" : "=r" (ret));
return ret;
 }
+
+static inline void pmcr_write(uint32_t value)
+{
+   asm volatile("msr pmcr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint64_t pmccntr_read(void)
+{
+   uint64_t cycles;
+
+   asm volatile("mrs %0, pmccntr_el0" : "=r" (cycles));
+   return cycles;
+}
+
+static inline void pmccntr_write(uint64_t value)
+{
+   asm volatile("msr pmccntr_el0, %0" : : "r" (value));
+}
+
+static inline void pmcntenset_write(uint32_t value)
+{
+   asm volatile("msr pmcntenset_el0, %0" : : "r" (value));
+}
+
+static inline void pmccfiltr_write(uint32_t value)
+{
+   asm volatile("msr pmccfiltr_el0, %0" : : "r" (value));
+   isb();
+}
+
+static inline uint32_t id_dfr0_read(void)
+{
+   uint32_t id;
+
+   asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
+   return id;
+}
 #endif
 
 /*
@@ -64,11 +176,55 @@ static bool check_pmcr(void)
return ((pmcr >> PMU_PMCR_IMP_SHIFT) & PMU_PMCR_IMP_MASK) != 0;
 }
 
+/*
+ * Ensure that the cycle counter progresses between back-to-back reads.
+ */
+static bool check_cycles_increase(void)
+{
+   bool success = true;
+
+   pmccntr_write(0);
+   pmcr_write(pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E);
+
+   for (int i = 0; i < NR_SAMPLES; i++) {
+   uint64_t a, b;
+
+   a = pmccntr_read();
+   b = pmccntr_read();
+
+   if (a >= b) {
+   printf("Read %"PRId64" then %"PRId64".\n", a, b);
+   success = false;
+   break;
+   }
+   }
+
+   pmcr_write(pmcr_read() & ~PMU_PMCR_E);
+
+   return success;
+}
+
+void pmu_init(void)
+{
+   uint32_t dfr0;
+
+   /* probe pmu version */
+   dfr0 = id_dfr0_read();
+   pmu_version = (dfr0 >> ID_DFR0_PERFMON_SHIFT) & ID_DFR0_PERFMON_MASK;
+   printf("PMU version: %d\n", pmu_version);
+   
+   /* init for PMU event access, right now only care about cycle count */
+   pmcntenset_write(1 << PMU_CYCLE_IDX);
+   

[kvm-unit-tests PATCH v9 3/3] arm: pmu: Add CPI checking

2016-11-18 Thread Wei Huang
From: Christopher Covington 

Calculate the numbers of cycles per instruction (CPI) implied by ARM
PMU cycle counter values. The code includes a strict checking facility
intended for the -icount option in TCG mode in the configuration file.

Signed-off-by: Christopher Covington 
Signed-off-by: Wei Huang 
---
 arm/pmu.c | 111 +-
 arm/unittests.cfg |  14 +++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/arm/pmu.c b/arm/pmu.c
index fa87de4..b36c4fb 100644
--- a/arm/pmu.c
+++ b/arm/pmu.c
@@ -104,6 +104,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrc p15, 0, %0, c0, c1, 2" : "=r" (val));
return val;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   mcr p15, 0, %[pmcr], c9, c12, 0\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   bgt 1b\n"
+   "   mcr p15, 0, %[z], c9, c12, 0\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr), [z] "r" (0)
+   : "cc");
+}
 #elif defined(__aarch64__)
 static inline uint32_t pmcr_read(void)
 {
@@ -150,6 +169,25 @@ static inline uint32_t id_dfr0_read(void)
asm volatile("mrs %0, id_dfr0_el1" : "=r" (id));
return id;
 }
+
+/*
+ * Extra instructions inserted by the compiler would be difficult to compensate
+ * for, so hand assemble everything between, and including, the PMCR accesses
+ * to start and stop counting.
+ */
+static inline void loop(int i, uint32_t pmcr)
+{
+   asm volatile(
+   "   msr pmcr_el0, %[pmcr]\n"
+   "   isb\n"
+   "1: subs%[i], %[i], #1\n"
+   "   b.gt1b\n"
+   "   msr pmcr_el0, xzr\n"
+   "   isb\n"
+   : [i] "+r" (i)
+   : [pmcr] "r" (pmcr)
+   : "cc");
+}
 #endif
 
 /*
@@ -204,6 +242,71 @@ static bool check_cycles_increase(void)
return success;
 }
 
+/*
+ * Execute a known number of guest instructions. Only odd instruction counts
+ * greater than or equal to 3 are supported by the in-line assembly code. The
+ * control register (PMCR_EL0) is initialized with the provided value (allowing
+ * for example for the cycle counter or event counters to be reset). At the end
+ * of the exact instruction loop, zero is written to PMCR_EL0 to disable
+ * counting, allowing the cycle counter or event counters to be read at the
+ * leisure of the calling code.
+ */
+static void measure_instrs(int num, uint32_t pmcr)
+{
+   int i = (num - 1) / 2;
+
+   assert(num >= 3 && ((num - 1) % 2 == 0));
+   loop(i, pmcr);
+}
+
+/*
+ * Measure cycle counts for various known instruction counts. Ensure that the
+ * cycle counter progresses (similar to check_cycles_increase() but with more
+ * instructions and using reset and stop controls). If supplied a positive,
+ * nonzero CPI parameter, also strictly check that every measurement matches
+ * it. Strict CPI checking is used to test -icount mode.
+ */
+static bool check_cpi(int cpi)
+{
+   uint32_t pmcr = pmcr_read() | PMU_PMCR_LC | PMU_PMCR_C | PMU_PMCR_E;
+   
+   if (cpi > 0)
+   printf("Checking for CPI=%d.\n", cpi);
+   printf("instrs : cycles0 cycles1 ...\n");
+
+   for (unsigned int i = 3; i < 300; i += 32) {
+   uint64_t avg, sum = 0;
+
+   printf("%d :", i);
+   for (int j = 0; j < NR_SAMPLES; j++) {
+   uint64_t cycles;
+
+   pmccntr_write(0);
+   measure_instrs(i, pmcr);
+   cycles = pmccntr_read();
+   printf(" %"PRId64"", cycles);
+
+   /*
+* The cycles taken by the loop above should fit in
+* 32 bits easily. We check the upper 32 bits of the
+* cycle counter to make sure there is no supprise.
+*/
+   if (!cycles || (cpi > 0 && cycles != i * cpi) ||
+   (cycles & 0x)) {
+   printf("\n");
+   return false;
+   }
+
+   sum += cycles;
+   }
+   avg = sum / NR_SAMPLES;
+   printf(" sum=%"PRId64" avg=%"PRId64" avg_ipc=%"PRId64" "
+  "avg_cpi=%"PRId64"\n", sum, avg, i / avg, avg / i);
+   }
+
+   return true;
+}
+
 void pmu_init(void)
 {
uint32_t dfr0;
@@ -218,13 +321,19 @@ void pmu_init(void)
pmccfiltr_write(0); /* count cycles in EL0, EL1, but not EL2 */
 }
 
-int main(void)
+int main(int argc, char *argv[])
 {
+   int cpi = 0;
+
+   if (argc >= 1)
+ 

Re: [PATCH 3/3] thermal: rockchip: don't pass table structs by value

2016-11-18 Thread Caesar Wang

在 2016年11月19日 07:52, Brian Norris 写道:

This driver passes struct chip_tsadc_table by value throughout; this is
inefficient, and AFAICT, there is no reason for it. Let's pass pointers
instead.

Signed-off-by: Brian Norris 

Reviewed-by: Caesar Wang 
Tested-by: Caesar Wang 

Yup, that make sense to improve efficiency.
Thanks the fixes.

And tested on rk3399 evb board.


---
  drivers/thermal/rockchip_thermal.c | 80 +++---
  1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index 35554d146b9d..30fb95a0dff0 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -118,11 +118,11 @@ struct rockchip_tsadc_chip {
void (*control)(void __iomem *reg, bool on);
  
  	/* Per-sensor methods */

-   int (*get_temp)(struct chip_tsadc_table table,
+   int (*get_temp)(const struct chip_tsadc_table *table,
int chn, void __iomem *reg, int *temp);
-   void (*set_alarm_temp)(struct chip_tsadc_table table,
+   void (*set_alarm_temp)(const struct chip_tsadc_table *table,
   int chn, void __iomem *reg, int temp);
-   void (*set_tshut_temp)(struct chip_tsadc_table table,
+   void (*set_tshut_temp)(const struct chip_tsadc_table *table,
   int chn, void __iomem *reg, int temp);
void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
  
@@ -397,26 +397,26 @@ static const struct tsadc_table rk3399_code_table[] = {

{TSADCV3_DATA_MASK, 125000},
  };
  
-static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,

+static u32 rk_tsadcv2_temp_to_code(const struct chip_tsadc_table *table,
   int temp)
  {
int high, low, mid;
u32 error = 0;
  
  	low = 0;

-   high = table.length - 1;
+   high = table->length - 1;
mid = (high + low) / 2;
  
  	/* Return mask code data when the temp is over table range */

-   if (temp < table.id[low].temp || temp > table.id[high].temp) {
-   error = table.data_mask;
+   if (temp < table->id[low].temp || temp > table->id[high].temp) {
+   error = table->data_mask;
goto exit;
}
  
  	while (low <= high) {

-   if (temp == table.id[mid].temp)
-   return table.id[mid].code;
-   else if (temp < table.id[mid].temp)
+   if (temp == table->id[mid].temp)
+   return table->id[mid].code;
+   else if (temp < table->id[mid].temp)
high = mid - 1;
else
low = mid + 1;
@@ -429,28 +429,28 @@ static u32 rk_tsadcv2_temp_to_code(struct 
chip_tsadc_table table,
return error;
  }
  
-static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,

-  int *temp)
+static int rk_tsadcv2_code_to_temp(const struct chip_tsadc_table *table,
+  u32 code, int *temp)
  {
unsigned int low = 1;
-   unsigned int high = table.length - 1;
+   unsigned int high = table->length - 1;
unsigned int mid = (low + high) / 2;
unsigned int num;
unsigned long denom;
  
-	WARN_ON(table.length < 2);

+   WARN_ON(table->length < 2);
  
-	switch (table.mode) {

+   switch (table->mode) {
case ADC_DECREMENT:
-   code &= table.data_mask;
-   if (code < table.id[high].code)
+   code &= table->data_mask;
+   if (code < table->id[high].code)
return -EAGAIN; /* Incorrect reading */
  
  		while (low <= high) {

-   if (code >= table.id[mid].code &&
-   code < table.id[mid - 1].code)
+   if (code >= table->id[mid].code &&
+   code < table->id[mid - 1].code)
break;
-   else if (code < table.id[mid].code)
+   else if (code < table->id[mid].code)
low = mid + 1;
else
high = mid - 1;
@@ -459,15 +459,15 @@ static int rk_tsadcv2_code_to_temp(struct 
chip_tsadc_table table, u32 code,
}
break;
case ADC_INCREMENT:
-   code &= table.data_mask;
-   if (code < table.id[low].code)
+   code &= table->data_mask;
+   if (code < table->id[low].code)
return -EAGAIN; /* Incorrect reading */
  
  		while (low <= high) {

-   if (code <= table.id[mid].code &&
-   code > table.id[mid - 1].code)
+   if (code <= 

Re: [PATCH 3/3] thermal: rockchip: don't pass table structs by value

2016-11-18 Thread Caesar Wang

在 2016年11月19日 07:52, Brian Norris 写道:

This driver passes struct chip_tsadc_table by value throughout; this is
inefficient, and AFAICT, there is no reason for it. Let's pass pointers
instead.

Signed-off-by: Brian Norris 

Reviewed-by: Caesar Wang 
Tested-by: Caesar Wang 

Yup, that make sense to improve efficiency.
Thanks the fixes.

And tested on rk3399 evb board.


---
  drivers/thermal/rockchip_thermal.c | 80 +++---
  1 file changed, 40 insertions(+), 40 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index 35554d146b9d..30fb95a0dff0 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -118,11 +118,11 @@ struct rockchip_tsadc_chip {
void (*control)(void __iomem *reg, bool on);
  
  	/* Per-sensor methods */

-   int (*get_temp)(struct chip_tsadc_table table,
+   int (*get_temp)(const struct chip_tsadc_table *table,
int chn, void __iomem *reg, int *temp);
-   void (*set_alarm_temp)(struct chip_tsadc_table table,
+   void (*set_alarm_temp)(const struct chip_tsadc_table *table,
   int chn, void __iomem *reg, int temp);
-   void (*set_tshut_temp)(struct chip_tsadc_table table,
+   void (*set_tshut_temp)(const struct chip_tsadc_table *table,
   int chn, void __iomem *reg, int temp);
void (*set_tshut_mode)(int chn, void __iomem *reg, enum tshut_mode m);
  
@@ -397,26 +397,26 @@ static const struct tsadc_table rk3399_code_table[] = {

{TSADCV3_DATA_MASK, 125000},
  };
  
-static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table table,

+static u32 rk_tsadcv2_temp_to_code(const struct chip_tsadc_table *table,
   int temp)
  {
int high, low, mid;
u32 error = 0;
  
  	low = 0;

-   high = table.length - 1;
+   high = table->length - 1;
mid = (high + low) / 2;
  
  	/* Return mask code data when the temp is over table range */

-   if (temp < table.id[low].temp || temp > table.id[high].temp) {
-   error = table.data_mask;
+   if (temp < table->id[low].temp || temp > table->id[high].temp) {
+   error = table->data_mask;
goto exit;
}
  
  	while (low <= high) {

-   if (temp == table.id[mid].temp)
-   return table.id[mid].code;
-   else if (temp < table.id[mid].temp)
+   if (temp == table->id[mid].temp)
+   return table->id[mid].code;
+   else if (temp < table->id[mid].temp)
high = mid - 1;
else
low = mid + 1;
@@ -429,28 +429,28 @@ static u32 rk_tsadcv2_temp_to_code(struct 
chip_tsadc_table table,
return error;
  }
  
-static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,

-  int *temp)
+static int rk_tsadcv2_code_to_temp(const struct chip_tsadc_table *table,
+  u32 code, int *temp)
  {
unsigned int low = 1;
-   unsigned int high = table.length - 1;
+   unsigned int high = table->length - 1;
unsigned int mid = (low + high) / 2;
unsigned int num;
unsigned long denom;
  
-	WARN_ON(table.length < 2);

+   WARN_ON(table->length < 2);
  
-	switch (table.mode) {

+   switch (table->mode) {
case ADC_DECREMENT:
-   code &= table.data_mask;
-   if (code < table.id[high].code)
+   code &= table->data_mask;
+   if (code < table->id[high].code)
return -EAGAIN; /* Incorrect reading */
  
  		while (low <= high) {

-   if (code >= table.id[mid].code &&
-   code < table.id[mid - 1].code)
+   if (code >= table->id[mid].code &&
+   code < table->id[mid - 1].code)
break;
-   else if (code < table.id[mid].code)
+   else if (code < table->id[mid].code)
low = mid + 1;
else
high = mid - 1;
@@ -459,15 +459,15 @@ static int rk_tsadcv2_code_to_temp(struct 
chip_tsadc_table table, u32 code,
}
break;
case ADC_INCREMENT:
-   code &= table.data_mask;
-   if (code < table.id[low].code)
+   code &= table->data_mask;
+   if (code < table->id[low].code)
return -EAGAIN; /* Incorrect reading */
  
  		while (low <= high) {

-   if (code <= table.id[mid].code &&
-   code > table.id[mid - 1].code)
+   if (code <= table->id[mid].code &&
+   code > table->id[mid - 

Re: [PATCH 2/3] thermal: rockchip: improve conversion error messages

2016-11-18 Thread Caesar Wang

在 2016年11月19日 11:31, Caesar Wang 写道:

Brian,

在 2016年11月19日 07:52, Brian Norris 写道:

These error messages don't give much information about what went wrong.
It would be nice, for one, to see what invalid temperature was being
requested when conversion fails. It's also good to return an error when
we can't handle a conversion properly.

While we're at it, fix the grammar too.

Signed-off-by: Brian Norris 

Reviewed-by: Caesar w...@rock-chips.com

Thanks the fixes.

-Caesar

---
Note: it'd probably be even nicer to know which sensor this was, but 
we've

kinda abstracted that one away by this point...

  drivers/thermal/rockchip_thermal.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c

index e227a9f0acf7..35554d146b9d 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -424,7 +424,8 @@ static u32 rk_tsadcv2_temp_to_code(struct 
chip_tsadc_table table,

  }
exit:
-pr_err("Invalid the conversion, error=%d\n", error);
+pr_err("%s: invalid temperature, temp=%d error=%d\n",
+__func__, temp, error);


I have do some similar for rockchip inside thermal driver.  Forget to 
send for upstream. :(

exit:
pr_err("%s: Invalid conversion table: code=%d, temperature=%d\n",
   __func__, error, temp);


  return error;
  }
  @@ -475,7 +476,9 @@ static int rk_tsadcv2_code_to_temp(struct 
chip_tsadc_table table, u32 code,

  }
  break;
  default:
-pr_err("Invalid the conversion table\n");
+pr_err("%s: invalid conversion table, mode=%d\n",
+__func__, table.mode);
+return -EINVAL;
  }
/*







Re: [PATCH 2/3] thermal: rockchip: improve conversion error messages

2016-11-18 Thread Caesar Wang

Brian,

在 2016年11月19日 07:52, Brian Norris 写道:

These error messages don't give much information about what went wrong.
It would be nice, for one, to see what invalid temperature was being
requested when conversion fails. It's also good to return an error when
we can't handle a conversion properly.

While we're at it, fix the grammar too.

Signed-off-by: Brian Norris 
---
Note: it'd probably be even nicer to know which sensor this was, but we've
kinda abstracted that one away by this point...

  drivers/thermal/rockchip_thermal.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index e227a9f0acf7..35554d146b9d 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -424,7 +424,8 @@ static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table 
table,
}
  
  exit:

-   pr_err("Invalid the conversion, error=%d\n", error);
+   pr_err("%s: invalid temperature, temp=%d error=%d\n",
+   __func__, temp, error);


I have do some similar for rockchip inside thermal driver.  Forget to 
send for upstream. :(

exit:
pr_err("%s: Invalid conversion table: code=%d, temperature=%d\n",
   __func__, error, temp);


return error;
  }
  
@@ -475,7 +476,9 @@ static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,

}
break;
default:
-   pr_err("Invalid the conversion table\n");
+   pr_err("%s: invalid conversion table, mode=%d\n",
+   __func__, table.mode);
+   return -EINVAL;
}
  
  	/*





Re: [PATCH 2/3] thermal: rockchip: improve conversion error messages

2016-11-18 Thread Caesar Wang

在 2016年11月19日 11:31, Caesar Wang 写道:

Brian,

在 2016年11月19日 07:52, Brian Norris 写道:

These error messages don't give much information about what went wrong.
It would be nice, for one, to see what invalid temperature was being
requested when conversion fails. It's also good to return an error when
we can't handle a conversion properly.

While we're at it, fix the grammar too.

Signed-off-by: Brian Norris 

Reviewed-by: Caesar w...@rock-chips.com

Thanks the fixes.

-Caesar

---
Note: it'd probably be even nicer to know which sensor this was, but 
we've

kinda abstracted that one away by this point...

  drivers/thermal/rockchip_thermal.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c

index e227a9f0acf7..35554d146b9d 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -424,7 +424,8 @@ static u32 rk_tsadcv2_temp_to_code(struct 
chip_tsadc_table table,

  }
exit:
-pr_err("Invalid the conversion, error=%d\n", error);
+pr_err("%s: invalid temperature, temp=%d error=%d\n",
+__func__, temp, error);


I have do some similar for rockchip inside thermal driver.  Forget to 
send for upstream. :(

exit:
pr_err("%s: Invalid conversion table: code=%d, temperature=%d\n",
   __func__, error, temp);


  return error;
  }
  @@ -475,7 +476,9 @@ static int rk_tsadcv2_code_to_temp(struct 
chip_tsadc_table table, u32 code,

  }
  break;
  default:
-pr_err("Invalid the conversion table\n");
+pr_err("%s: invalid conversion table, mode=%d\n",
+__func__, table.mode);
+return -EINVAL;
  }
/*







Re: [PATCH 2/3] thermal: rockchip: improve conversion error messages

2016-11-18 Thread Caesar Wang

Brian,

在 2016年11月19日 07:52, Brian Norris 写道:

These error messages don't give much information about what went wrong.
It would be nice, for one, to see what invalid temperature was being
requested when conversion fails. It's also good to return an error when
we can't handle a conversion properly.

While we're at it, fix the grammar too.

Signed-off-by: Brian Norris 
---
Note: it'd probably be even nicer to know which sensor this was, but we've
kinda abstracted that one away by this point...

  drivers/thermal/rockchip_thermal.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/thermal/rockchip_thermal.c 
b/drivers/thermal/rockchip_thermal.c
index e227a9f0acf7..35554d146b9d 100644
--- a/drivers/thermal/rockchip_thermal.c
+++ b/drivers/thermal/rockchip_thermal.c
@@ -424,7 +424,8 @@ static u32 rk_tsadcv2_temp_to_code(struct chip_tsadc_table 
table,
}
  
  exit:

-   pr_err("Invalid the conversion, error=%d\n", error);
+   pr_err("%s: invalid temperature, temp=%d error=%d\n",
+   __func__, temp, error);


I have do some similar for rockchip inside thermal driver.  Forget to 
send for upstream. :(

exit:
pr_err("%s: Invalid conversion table: code=%d, temperature=%d\n",
   __func__, error, temp);


return error;
  }
  
@@ -475,7 +476,9 @@ static int rk_tsadcv2_code_to_temp(struct chip_tsadc_table table, u32 code,

}
break;
default:
-   pr_err("Invalid the conversion table\n");
+   pr_err("%s: invalid conversion table, mode=%d\n",
+   __func__, table.mode);
+   return -EINVAL;
}
  
  	/*





Re: [RFC][PATCH 2/7] kref: Add kref_read()

2016-11-18 Thread Alexei Starovoitov
On Fri, Nov 18, 2016 at 05:33:35PM +, Reshetova, Elena wrote:
> On Thu, Nov 17, 2016 at 09:53:42AM +0100, Peter Zijlstra wrote:
> > On Wed, Nov 16, 2016 at 12:08:52PM -0800, Alexei Starovoitov wrote:
> > 
> > > I prefer to avoid 'fixing' things that are not broken.
> > > Note, prog->aux->refcnt already has explicit checks for overflow.
> > > locked_vm is used for resource accounting and not refcnt, so I don't 
> > > see issues there either.
> > 
> > The idea is to use something along the lines of:
> > 
> >   
> > http://lkml.kernel.org/r/20161115104608.GH3142@twins.programming.kicks
> > -ass.net
> > 
> > for all refcounts in the kernel.
> 
> >I understand the idea. I'm advocating to fix refcnts explicitly the way we 
> >did in bpf land instead of leaking memory, making processes unkillable and 
> >so on.
> >If refcnt can be bounds checked, it should be done that way, since it's a 
> >clean error path without odd side effects.
> >Therefore I'm against unconditionally applying refcount to all atomics.
> 
> > Also note that your:
> > 
> > struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) {
> > if (atomic_add_return(i, >aux->refcnt) > BPF_MAX_REFCNT) {
> > atomic_sub(i, >aux->refcnt);
> > return ERR_PTR(-EBUSY);
> > }
> > return prog;
> > }
> > 
> > is actually broken in the face of an actual overflow. Suppose @i is 
> > big enough to wrap refcnt into negative space.
> 
> >'i' is not controlled by user. It's a number of nic hw queues and 
> >BPF_MAX_REFCNT is 32k, so above is always safe.
> 
> If I understand your code right, you export the bpf_prog_add() and anyone is 
> free to use it 
> (some crazy buggy driver for example).
> Currently only drivers/net/ethernet/mellanox/mlx4/en_netdev.c uses it, but 
> you should
> consider any externally exposed interface as an attack vector from security 
> point of view. 

It's not realistic to harden all export_symbol apis.
Code review for in-tree modules is the only defense we have.
Remember out of tree perf counter issues... nothing perf core can do
about that. If it's out of tree, it's vendor's problem to fix it.



Re: [RFC][PATCH 2/7] kref: Add kref_read()

2016-11-18 Thread Alexei Starovoitov
On Fri, Nov 18, 2016 at 05:33:35PM +, Reshetova, Elena wrote:
> On Thu, Nov 17, 2016 at 09:53:42AM +0100, Peter Zijlstra wrote:
> > On Wed, Nov 16, 2016 at 12:08:52PM -0800, Alexei Starovoitov wrote:
> > 
> > > I prefer to avoid 'fixing' things that are not broken.
> > > Note, prog->aux->refcnt already has explicit checks for overflow.
> > > locked_vm is used for resource accounting and not refcnt, so I don't 
> > > see issues there either.
> > 
> > The idea is to use something along the lines of:
> > 
> >   
> > http://lkml.kernel.org/r/20161115104608.GH3142@twins.programming.kicks
> > -ass.net
> > 
> > for all refcounts in the kernel.
> 
> >I understand the idea. I'm advocating to fix refcnts explicitly the way we 
> >did in bpf land instead of leaking memory, making processes unkillable and 
> >so on.
> >If refcnt can be bounds checked, it should be done that way, since it's a 
> >clean error path without odd side effects.
> >Therefore I'm against unconditionally applying refcount to all atomics.
> 
> > Also note that your:
> > 
> > struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) {
> > if (atomic_add_return(i, >aux->refcnt) > BPF_MAX_REFCNT) {
> > atomic_sub(i, >aux->refcnt);
> > return ERR_PTR(-EBUSY);
> > }
> > return prog;
> > }
> > 
> > is actually broken in the face of an actual overflow. Suppose @i is 
> > big enough to wrap refcnt into negative space.
> 
> >'i' is not controlled by user. It's a number of nic hw queues and 
> >BPF_MAX_REFCNT is 32k, so above is always safe.
> 
> If I understand your code right, you export the bpf_prog_add() and anyone is 
> free to use it 
> (some crazy buggy driver for example).
> Currently only drivers/net/ethernet/mellanox/mlx4/en_netdev.c uses it, but 
> you should
> consider any externally exposed interface as an attack vector from security 
> point of view. 

It's not realistic to harden all export_symbol apis.
Code review for in-tree modules is the only defense we have.
Remember out of tree perf counter issues... nothing perf core can do
about that. If it's out of tree, it's vendor's problem to fix it.



Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Eduardo Valentin
On Fri, Nov 18, 2016 at 03:52:55PM -0800, Brian Norris wrote:
> If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
> get an error from the zone's get_temp() callback, but we'll ignore that
> and keep using its value. Let's just error out properly instead.
> 
> Signed-off-by: Brian Norris 
> ---
>  drivers/thermal/thermal_core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 911fd964c742..0fa497f10d25 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
> int *temp)
>   mutex_lock(>lock);
>  
>   ret = tz->ops->get_temp(tz, temp);
> + if (ret)
> + goto exit_unlock;

Yeah, but the follow through is intentional, if I am not mistaken.


>  
>   if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {

Even if the driver is not able to read real temperature, but emul temp
is configured, then there is still opportunity to report the emulated
temperature.

>   for (count = 0; count < tz->trips; count++) {
> @@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
> int *temp)
>   *temp = tz->emul_temperature;

And if you check the lines at the bottom of the loop, you will see that,
in the fail case, we will stil compare to what is the content of temp,
which might be problematic.

I would prefer we consider the patch I sent
some time ago:
https://patchwork.kernel.org/patch/7876381/

>   }
>   
> +exit_unlock:
>   mutex_unlock(>lock);
>  exit:
>   return ret;
> -- 
> 2.8.0.rc3.226.g39d4020
> 


Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Eduardo Valentin
On Fri, Nov 18, 2016 at 03:52:55PM -0800, Brian Norris wrote:
> If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
> get an error from the zone's get_temp() callback, but we'll ignore that
> and keep using its value. Let's just error out properly instead.
> 
> Signed-off-by: Brian Norris 
> ---
>  drivers/thermal/thermal_core.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
> index 911fd964c742..0fa497f10d25 100644
> --- a/drivers/thermal/thermal_core.c
> +++ b/drivers/thermal/thermal_core.c
> @@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
> int *temp)
>   mutex_lock(>lock);
>  
>   ret = tz->ops->get_temp(tz, temp);
> + if (ret)
> + goto exit_unlock;

Yeah, but the follow through is intentional, if I am not mistaken.


>  
>   if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {

Even if the driver is not able to read real temperature, but emul temp
is configured, then there is still opportunity to report the emulated
temperature.

>   for (count = 0; count < tz->trips; count++) {
> @@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
> int *temp)
>   *temp = tz->emul_temperature;

And if you check the lines at the bottom of the loop, you will see that,
in the fail case, we will stil compare to what is the content of temp,
which might be problematic.

I would prefer we consider the patch I sent
some time ago:
https://patchwork.kernel.org/patch/7876381/

>   }
>   
> +exit_unlock:
>   mutex_unlock(>lock);
>  exit:
>   return ret;
> -- 
> 2.8.0.rc3.226.g39d4020
> 


PROBLEM: dmesg spam: alloc_contig_range: [XX, YY) PFNs busy

2016-11-18 Thread Robin H. Johnson
(Replies CC to list and direct to me please)

Summary:

dmesg spammed with alloc_contig_range: [XX, YY) PFNs busy

Description:

I recently upgrading 4.9-rc5, (previous kernel 4.5.0-rc6-00141-g6794402), 
and since then my dmesg has been absolutely flooded with 'PFNs busy'
(>3GiB/day). My config did not change (all new options =n).

It's not consistent addresses, so the squelch of identical printk lines
hasn't helped.
Eg output:
[187487.621916] alloc_contig_range: [83f0a9, 83f0aa) PFNs busy
[187487.621924] alloc_contig_range: [83f0ce, 83f0cf) PFNs busy
[187487.621976] alloc_contig_range: [83f125, 83f126) PFNs busy
[187487.622013] alloc_contig_range: [83f127, 83f128) PFNs busy

Keywords:
-
mm, alloc_contig_range, CMA

Most recent kernel version which did not have the bug:
--
Known 4.5.0-rc6-00141-g6794402

ver_linux:
--
Linux bohr-int 4.9.0-rc5-00177-g81bcfe5 #12 SMP Wed Nov 16 13:16:32 PST
2016 x86_64 Intel(R) Core(TM) i7-2600K CPU @ 3.40GHz GenuineIntel
GNU/Linux

GNU C   5.3.0
GNU Make4.2.1
Binutils2.25.1
Util-linux  2.29
Mount   2.29
Quota-tools 4.03
Linux C Library 2.23
Dynamic linker (ldd)2.23
readlink: missing operand
Try 'readlink --help' for more information.
Procps  3.3.12
Net-tools   1.60
Kbd 2.0.3
Console-tools   2.0.3
Sh-utils8.25
Udev230
Modules Loaded  3w_sas 3w_ ablk_helper aesni_intel
aes_x86_64 af_packet ahci aic79xx amdgpu async_memcpy async_pq
async_raid6_recov async_tx async_xor ata_piix auth_rpcgss binfmt_misc
bluetooth bnep bnx2 bonding btbcm btintel btrfs btrtl btusb button cdrom
cn configs coretemp crc32c_intel crc32_pclmul crc_ccitt crc_itu_t
crct10dif_pclmul cryptd dca dm_bio_prison dm_bufio dm_cache dm_cache_smq
dm_crypt dm_delay dm_flakey dm_log dm_log_userspace dm_mirror dm_mod
dm_multipath dm_persistent_data dm_queue_length dm_raid dm_region_hash
dm_round_robin dm_service_time dm_snapshot dm_thin_pool dm_zero drm
drm_kms_helper dummy e1000 e1000e evdev ext2 fat fb_sys_fops
firewire_core firewire_ohci fjes fscache fuse ghash_clmulni_intel
glue_helper grace hangcheck_timer hid_a4tech hid_apple hid_belkin
hid_cherry hid_chicony hid_cypress hid_ezkey hid_generic hid_gyration
hid_logitech hid_logitech_dj hid_microsoft hid_monterey hid_petalynx
hid_pl hid_samsung hid_sony hid_sunplus hwmon_vid i2c_algo_bit i2c_i801
i2c_smbus igb input_leds intel_rapl ip6_udp_tunnel ipv6 irqbypass
iscsi_tcp iTCO_vendor_support iTCO_wdt ixgb ixgbe jfs kvm kvm_intel
libahci libata libcrc32c libiscsi libiscsi_tcp linear lockd lpc_ich lpfc
lrw macvlan mdio md_mod megaraid_mbox megaraid_mm megaraid_sas mii
mptbase mptfc mptsas mptscsih mptspi multipath nfs nfs_acl nfsd
nls_cp437 nls_iso8859_1 nvram ohci_hcd pata_jmicron pata_marvell
pata_platform pcspkr psmouse qla1280 qla2xxx r8169 radeon raid0 raid10
raid1 raid456 raid6_pq reiserfs rfkill sata_mv sata_sil24
scsi_transport_fc scsi_transport_iscsi scsi_transport_sas
scsi_transport_spi sd_mod sg sky2 snd snd_hda_codec
snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_core snd_hda_intel snd_hwdep snd_pcm snd_timer soundcore sr_mod
sunrpc syscopyarea sysfillrect sysimgblt tg3 ttm uas udp_tunnel
usb_storage vfat virtio virtio_net virtio_ring vxlan w83627ehf
x86_pkg_temp_thermal xfs xhci_hcd xhci_pci xor zlib_deflate



-- 
Robin Hugh Johnson
Gentoo Linux: Dev, Infra Lead, Foundation Trustee & Treasurer
E-Mail   : robb...@gentoo.org
GnuPG FP : 11ACBA4F 4778E3F6 E4EDF38E B27B944E 34884E85
GnuPG FP : 7D0B3CEB E9B85B1F 825BCECF EE05E6F6 A48F6136


signature.asc
Description: Digital signature


PROBLEM: dmesg spam: alloc_contig_range: [XX, YY) PFNs busy

2016-11-18 Thread Robin H. Johnson
(Replies CC to list and direct to me please)

Summary:

dmesg spammed with alloc_contig_range: [XX, YY) PFNs busy

Description:

I recently upgrading 4.9-rc5, (previous kernel 4.5.0-rc6-00141-g6794402), 
and since then my dmesg has been absolutely flooded with 'PFNs busy'
(>3GiB/day). My config did not change (all new options =n).

It's not consistent addresses, so the squelch of identical printk lines
hasn't helped.
Eg output:
[187487.621916] alloc_contig_range: [83f0a9, 83f0aa) PFNs busy
[187487.621924] alloc_contig_range: [83f0ce, 83f0cf) PFNs busy
[187487.621976] alloc_contig_range: [83f125, 83f126) PFNs busy
[187487.622013] alloc_contig_range: [83f127, 83f128) PFNs busy

Keywords:
-
mm, alloc_contig_range, CMA

Most recent kernel version which did not have the bug:
--
Known 4.5.0-rc6-00141-g6794402

ver_linux:
--
Linux bohr-int 4.9.0-rc5-00177-g81bcfe5 #12 SMP Wed Nov 16 13:16:32 PST
2016 x86_64 Intel(R) Core(TM) i7-2600K CPU @ 3.40GHz GenuineIntel
GNU/Linux

GNU C   5.3.0
GNU Make4.2.1
Binutils2.25.1
Util-linux  2.29
Mount   2.29
Quota-tools 4.03
Linux C Library 2.23
Dynamic linker (ldd)2.23
readlink: missing operand
Try 'readlink --help' for more information.
Procps  3.3.12
Net-tools   1.60
Kbd 2.0.3
Console-tools   2.0.3
Sh-utils8.25
Udev230
Modules Loaded  3w_sas 3w_ ablk_helper aesni_intel
aes_x86_64 af_packet ahci aic79xx amdgpu async_memcpy async_pq
async_raid6_recov async_tx async_xor ata_piix auth_rpcgss binfmt_misc
bluetooth bnep bnx2 bonding btbcm btintel btrfs btrtl btusb button cdrom
cn configs coretemp crc32c_intel crc32_pclmul crc_ccitt crc_itu_t
crct10dif_pclmul cryptd dca dm_bio_prison dm_bufio dm_cache dm_cache_smq
dm_crypt dm_delay dm_flakey dm_log dm_log_userspace dm_mirror dm_mod
dm_multipath dm_persistent_data dm_queue_length dm_raid dm_region_hash
dm_round_robin dm_service_time dm_snapshot dm_thin_pool dm_zero drm
drm_kms_helper dummy e1000 e1000e evdev ext2 fat fb_sys_fops
firewire_core firewire_ohci fjes fscache fuse ghash_clmulni_intel
glue_helper grace hangcheck_timer hid_a4tech hid_apple hid_belkin
hid_cherry hid_chicony hid_cypress hid_ezkey hid_generic hid_gyration
hid_logitech hid_logitech_dj hid_microsoft hid_monterey hid_petalynx
hid_pl hid_samsung hid_sony hid_sunplus hwmon_vid i2c_algo_bit i2c_i801
i2c_smbus igb input_leds intel_rapl ip6_udp_tunnel ipv6 irqbypass
iscsi_tcp iTCO_vendor_support iTCO_wdt ixgb ixgbe jfs kvm kvm_intel
libahci libata libcrc32c libiscsi libiscsi_tcp linear lockd lpc_ich lpfc
lrw macvlan mdio md_mod megaraid_mbox megaraid_mm megaraid_sas mii
mptbase mptfc mptsas mptscsih mptspi multipath nfs nfs_acl nfsd
nls_cp437 nls_iso8859_1 nvram ohci_hcd pata_jmicron pata_marvell
pata_platform pcspkr psmouse qla1280 qla2xxx r8169 radeon raid0 raid10
raid1 raid456 raid6_pq reiserfs rfkill sata_mv sata_sil24
scsi_transport_fc scsi_transport_iscsi scsi_transport_sas
scsi_transport_spi sd_mod sg sky2 snd snd_hda_codec
snd_hda_codec_generic snd_hda_codec_hdmi snd_hda_codec_realtek
snd_hda_core snd_hda_intel snd_hwdep snd_pcm snd_timer soundcore sr_mod
sunrpc syscopyarea sysfillrect sysimgblt tg3 ttm uas udp_tunnel
usb_storage vfat virtio virtio_net virtio_ring vxlan w83627ehf
x86_pkg_temp_thermal xfs xhci_hcd xhci_pci xor zlib_deflate



-- 
Robin Hugh Johnson
Gentoo Linux: Dev, Infra Lead, Foundation Trustee & Treasurer
E-Mail   : robb...@gentoo.org
GnuPG FP : 11ACBA4F 4778E3F6 E4EDF38E B27B944E 34884E85
GnuPG FP : 7D0B3CEB E9B85B1F 825BCECF EE05E6F6 A48F6136


signature.asc
Description: Digital signature


Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Caesar Wang

Brian,
在 2016年11月19日 07:52, Brian Norris 写道:

If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
get an error from the zone's get_temp() callback, but we'll ignore that
and keep using its value. Let's just error out properly instead.

Signed-off-by: Brian Norris 

Tested-by: Caesar Wang 

[8.111296] thermal thermal_zone4: power_allocator: sustainable_power 
will be estimated
[8.119420] thermal_zone_get_temp:537 the ret=-19, no such device, 
look like the A/D value had no ready yet.

..
Anyway, this patch is useful for improving thermal.

-Caesar

---
  drivers/thermal/thermal_core.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 911fd964c742..0fa497f10d25 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
int *temp)
mutex_lock(>lock);
  
  	ret = tz->ops->get_temp(tz, temp);

+   if (ret)
+   goto exit_unlock;
  
  	if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {

for (count = 0; count < tz->trips; count++) {
@@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
int *temp)
*temp = tz->emul_temperature;
}
   
+exit_unlock:

mutex_unlock(>lock);
  exit:
return ret;





Re: [PATCH 1/3] thermal: handle get_temp() errors properly

2016-11-18 Thread Caesar Wang

Brian,
在 2016年11月19日 07:52, Brian Norris 写道:

If using CONFIG_THERMAL_EMULATION, there's a corner case where we might
get an error from the zone's get_temp() callback, but we'll ignore that
and keep using its value. Let's just error out properly instead.

Signed-off-by: Brian Norris 

Tested-by: Caesar Wang 

[8.111296] thermal thermal_zone4: power_allocator: sustainable_power 
will be estimated
[8.119420] thermal_zone_get_temp:537 the ret=-19, no such device, 
look like the A/D value had no ready yet.

..
Anyway, this patch is useful for improving thermal.

-Caesar

---
  drivers/thermal/thermal_core.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 911fd964c742..0fa497f10d25 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -494,6 +494,8 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
int *temp)
mutex_lock(>lock);
  
  	ret = tz->ops->get_temp(tz, temp);

+   if (ret)
+   goto exit_unlock;
  
  	if (IS_ENABLED(CONFIG_THERMAL_EMULATION) && tz->emul_temperature) {

for (count = 0; count < tz->trips; count++) {
@@ -514,6 +516,7 @@ int thermal_zone_get_temp(struct thermal_zone_device *tz, 
int *temp)
*temp = tz->emul_temperature;
}
   
+exit_unlock:

mutex_unlock(>lock);
  exit:
return ret;





[PATCH RESEND] sched/rt: Change rt_nr_running to rt_queued in the comment

2016-11-18 Thread T.Zhou
The code actually checks rt_queued not rt_nr_running
in pick_next_task_rt(), so change the corresponding
comment.

Signed-off-by: T.Zhou 
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 2516b8d..9b4a5c5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1550,7 +1550,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct 
*prev, struct pin_cookie coo
 
/*
 * We may dequeue prev's rt_rq in put_prev_task().
-* So, we update time before rt_nr_running check.
+* So, we update time before rt_queued check.
 */
if (prev->sched_class == _sched_class)
update_curr_rt(rq);
-- 
2.7.3




[PATCH RESEND] sched/rt: Change rt_nr_running to rt_queued in the comment

2016-11-18 Thread T.Zhou
The code actually checks rt_queued not rt_nr_running
in pick_next_task_rt(), so change the corresponding
comment.

Signed-off-by: T.Zhou 
---
 kernel/sched/rt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 2516b8d..9b4a5c5 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1550,7 +1550,7 @@ pick_next_task_rt(struct rq *rq, struct task_struct 
*prev, struct pin_cookie coo
 
/*
 * We may dequeue prev's rt_rq in put_prev_task().
-* So, we update time before rt_nr_running check.
+* So, we update time before rt_queued check.
 */
if (prev->sched_class == _sched_class)
update_curr_rt(rq);
-- 
2.7.3




Re: [PATCH] sched/rt: Change rt_nr_running to rt_queued in the comment

2016-11-18 Thread T.Zhou
Hi Peter,

On Fri, Nov 18, 2016 at 04:00:57PM +0100, Peter Zijlstra wrote:
> On Fri, Nov 18, 2016 at 09:53:07AM -0500, Steven Rostedt wrote:
> > 
> > Peter,
> > 
> > Can you pull this patch into your queue?
> 
> Not in my inbox..

This patch is so simple, so i do not want to interrupt you..
Will resend soon.

Thanks,
T.Zhou



Re: [PATCH] sched/rt: Change rt_nr_running to rt_queued in the comment

2016-11-18 Thread T.Zhou
Hi Peter,

On Fri, Nov 18, 2016 at 04:00:57PM +0100, Peter Zijlstra wrote:
> On Fri, Nov 18, 2016 at 09:53:07AM -0500, Steven Rostedt wrote:
> > 
> > Peter,
> > 
> > Can you pull this patch into your queue?
> 
> Not in my inbox..

This patch is so simple, so i do not want to interrupt you..
Will resend soon.

Thanks,
T.Zhou



[PATCH] Tools: hv: kvp: configurable external scripts path

2016-11-18 Thread Alex Fluter

error when running hypervkvpd:
$ sudo ./hv_kvp_daemon -n

sh: hv_get_dns_info: command not found
sh: hv_get_dhcp_info: command not found
sh: hv_get_dns_info: command not found
sh: hv_get_dhcp_info: command not found

The external scripts are not installed in system path,
adding a configurable macro.

Signed-off-by: Alex Fluter 
---
 tools/hv/hv_kvp_daemon.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index bc7adb8..b1842e7 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -96,6 +96,10 @@ static struct utsname uts_buf;
 
 #define KVP_CONFIG_LOC "/var/lib/hyperv"
 
+#ifndef KVP_SCRIPTS_PATH
+#define KVP_SCRIPTS_PATH "/usr/libexec/hypervkvpd/"
+#endif
+
 #define MAX_FILE_NAME 100
 #define ENTRIES_PER_BLOCK 50
 
@@ -825,7 +829,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 * .
 */
 
-   sprintf(cmd, "%s",  "hv_get_dns_info");
+   sprintf(cmd, KVP_SCRIPTS_PATH "%s",  "hv_get_dns_info");
 
/*
 * Execute the command to gather DNS info.
@@ -842,7 +846,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 * Enabled: DHCP enabled.
 */
 
-   sprintf(cmd, "%s %s", "hv_get_dhcp_info", if_name);
+   sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name);
 
file = popen(cmd, "r");
if (file == NULL)
@@ -1348,7 +1352,7 @@ static int kvp_set_ip_info(char *if_name, struct 
hv_kvp_ipaddr_value *new_val)
 * invoke the external script to do its magic.
 */
 
-   snprintf(cmd, sizeof(cmd), "%s %s", "hv_set_ifconfig", if_file);
+   snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", "hv_set_ifconfig", 
if_file);
if (system(cmd)) {
syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s",
cmd, errno, strerror(errno));
-- 
2.7.4



[PATCH] Tools: hv: kvp: configurable external scripts path

2016-11-18 Thread Alex Fluter

error when running hypervkvpd:
$ sudo ./hv_kvp_daemon -n

sh: hv_get_dns_info: command not found
sh: hv_get_dhcp_info: command not found
sh: hv_get_dns_info: command not found
sh: hv_get_dhcp_info: command not found

The external scripts are not installed in system path,
adding a configurable macro.

Signed-off-by: Alex Fluter 
---
 tools/hv/hv_kvp_daemon.c | 10 +++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index bc7adb8..b1842e7 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -96,6 +96,10 @@ static struct utsname uts_buf;
 
 #define KVP_CONFIG_LOC "/var/lib/hyperv"
 
+#ifndef KVP_SCRIPTS_PATH
+#define KVP_SCRIPTS_PATH "/usr/libexec/hypervkvpd/"
+#endif
+
 #define MAX_FILE_NAME 100
 #define ENTRIES_PER_BLOCK 50
 
@@ -825,7 +829,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 * .
 */
 
-   sprintf(cmd, "%s",  "hv_get_dns_info");
+   sprintf(cmd, KVP_SCRIPTS_PATH "%s",  "hv_get_dns_info");
 
/*
 * Execute the command to gather DNS info.
@@ -842,7 +846,7 @@ static void kvp_get_ipconfig_info(char *if_name,
 * Enabled: DHCP enabled.
 */
 
-   sprintf(cmd, "%s %s", "hv_get_dhcp_info", if_name);
+   sprintf(cmd, KVP_SCRIPTS_PATH "%s %s", "hv_get_dhcp_info", if_name);
 
file = popen(cmd, "r");
if (file == NULL)
@@ -1348,7 +1352,7 @@ static int kvp_set_ip_info(char *if_name, struct 
hv_kvp_ipaddr_value *new_val)
 * invoke the external script to do its magic.
 */
 
-   snprintf(cmd, sizeof(cmd), "%s %s", "hv_set_ifconfig", if_file);
+   snprintf(cmd, sizeof(cmd), KVP_SCRIPTS_PATH "%s %s", "hv_set_ifconfig", 
if_file);
if (system(cmd)) {
syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s",
cmd, errno, strerror(errno));
-- 
2.7.4



Re: [GIT PULL 1/2] arm64: dts: exynos: Topic dts arm64 for v4.10

2016-11-18 Thread Olof Johansson
On Fri, Nov 18, 2016 at 02:44:34PM +0200, Krzysztof Kozlowski wrote:
> Hi,
> 
> Topic branch with a GIC interrupt fix and cleanup. No conflicts expected.
> No dependencies.
> 
> Best regards,
> Krzysztof
> 
> 
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
> tags/samsung-dt64-gic-flags-4.10
> 
> for you to fetch changes up to 86bb573d0b2a8e876a12d1348bd0b6e377c1043e:
> 
>   arm64: dts: exynos: Use human-friendly symbols for interrupt properties in 
> exynos7 (2016-11-03 22:40:39 +0200)
> 
> 
> Topic branch with DT arm64 changes for v4.10.
> 
> Fix invalid GIC interrupt flags - type IRQ_TYPE_NONE is not allowed for GIC
> interrupts.  Although this was working but with error messages like:
> genirq: Setting trigger mode 0 for irq 16 failed
> 
> Use level high interrupt instead of type none.  The choice of level high was
> rather an arbitrary decision hoping it will work on each platform.  Tests 
> shown
> no issues so far.

Merged, thanks.


-Olof


Re: [GIT PULL 2/2] ARM: dts: exynos: Topic dts for v4.10

2016-11-18 Thread Olof Johansson
On Fri, Nov 18, 2016 at 02:44:35PM +0200, Krzysztof Kozlowski wrote:
> Hi,
> 
> 
> Topic branch with a GIC interrupt fixes and cleanup (human-friendly symbols).
> 
> Possible rather trivial conflicts:
> 
> diff --cc arch/arm/boot/dts/exynos4x12.dtsi
> index 0074f566cd3b,505f047e81c6..85a7122658f1
> --- a/arch/arm/boot/dts/exynos4x12.dtsi
> +++ b/arch/arm/boot/dts/exynos4x12.dtsi
> @@@ -147,9 -147,10 +147,10 @@@
> };
>   
> fimc_is: fimc-is@1200 {
>  -  compatible = "samsung,exynos4212-fimc-is", 
> "simple-bus";
>  +  compatible = "samsung,exynos4212-fimc-is";
> reg = <0x1200 0x26>;
> -   interrupts = <0 90 0>, <0 95 0>;
> +   interrupts = ,
> +;
> power-domains = <_isp>;
> clocks = < CLK_FIMC_LITE0>,
>  < CLK_FIMC_LITE1>, < 
> CLK_PPMUISPX>,
> 
> diff --cc arch/arm/boot/dts/exynos5410.dtsi
> index 9a91685d8890,2501249d97aa..bb90bbdbe2d9
> --- a/arch/arm/boot/dts/exynos5410.dtsi
> +++ b/arch/arm/boot/dts/exynos5410.dtsi
> @@@ -190,58 -181,8 +190,58 @@@
> pinctrl_3: pinctrl@0386 {
> compatible = "samsung,exynos5410-pinctrl";
> reg = <0x0386 0x1000>;
> -   interrupts = <0 47 0>;
> +   interrupts = ;
> };
>  +
>  +  amba {
>  +  #address-cells = <1>;
>  +  #size-cells = <1>;
>  +  compatible = "simple-bus";
>  +  interrupt-parent = <>;
>  +  ranges;
> 
> 
> Best regards,
> Krzysztof
> 
> 
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
> tags/samsung-dt-gic-flags-4.10
> 
> for you to fetch changes up to 04a886727ca7e841afa2fbc5d87aff81ae256dbf:
> 
>   ARM: dts: exynos: Use human-friendly symbols for interrupt properties in 
> exynos5440 (2016-11-03 22:44:56 +0200)
> 
> 
> Topic branch with DT changes for v4.10.
> 
> Fix invalid GIC interrupt flags - type IRQ_TYPE_NONE is not allowed for GIC
> interrupts.  Although this was working but with error messages like:
>   genirq: Setting trigger mode 0 for irq 16 failed
> 
> Use level high interrupt instead of type none.  The choice of level high was
> rather an arbitrary decision hoping it will work on each platform.  Tests 
> shown
> no issues so far.

Merged, thanks. FYI, you could have based this on the previous DT branch
if you wanted to not expose the conflicts, but simple ones like these
(in particular that we can resolve in our own tree) are OK to expose too.


-Olof


Re: [GIT PULL 1/2] arm64: dts: exynos: Topic dts arm64 for v4.10

2016-11-18 Thread Olof Johansson
On Fri, Nov 18, 2016 at 02:44:34PM +0200, Krzysztof Kozlowski wrote:
> Hi,
> 
> Topic branch with a GIC interrupt fix and cleanup. No conflicts expected.
> No dependencies.
> 
> Best regards,
> Krzysztof
> 
> 
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
> tags/samsung-dt64-gic-flags-4.10
> 
> for you to fetch changes up to 86bb573d0b2a8e876a12d1348bd0b6e377c1043e:
> 
>   arm64: dts: exynos: Use human-friendly symbols for interrupt properties in 
> exynos7 (2016-11-03 22:40:39 +0200)
> 
> 
> Topic branch with DT arm64 changes for v4.10.
> 
> Fix invalid GIC interrupt flags - type IRQ_TYPE_NONE is not allowed for GIC
> interrupts.  Although this was working but with error messages like:
> genirq: Setting trigger mode 0 for irq 16 failed
> 
> Use level high interrupt instead of type none.  The choice of level high was
> rather an arbitrary decision hoping it will work on each platform.  Tests 
> shown
> no issues so far.

Merged, thanks.


-Olof


Re: [GIT PULL 2/2] ARM: dts: exynos: Topic dts for v4.10

2016-11-18 Thread Olof Johansson
On Fri, Nov 18, 2016 at 02:44:35PM +0200, Krzysztof Kozlowski wrote:
> Hi,
> 
> 
> Topic branch with a GIC interrupt fixes and cleanup (human-friendly symbols).
> 
> Possible rather trivial conflicts:
> 
> diff --cc arch/arm/boot/dts/exynos4x12.dtsi
> index 0074f566cd3b,505f047e81c6..85a7122658f1
> --- a/arch/arm/boot/dts/exynos4x12.dtsi
> +++ b/arch/arm/boot/dts/exynos4x12.dtsi
> @@@ -147,9 -147,10 +147,10 @@@
> };
>   
> fimc_is: fimc-is@1200 {
>  -  compatible = "samsung,exynos4212-fimc-is", 
> "simple-bus";
>  +  compatible = "samsung,exynos4212-fimc-is";
> reg = <0x1200 0x26>;
> -   interrupts = <0 90 0>, <0 95 0>;
> +   interrupts = ,
> +;
> power-domains = <_isp>;
> clocks = < CLK_FIMC_LITE0>,
>  < CLK_FIMC_LITE1>, < 
> CLK_PPMUISPX>,
> 
> diff --cc arch/arm/boot/dts/exynos5410.dtsi
> index 9a91685d8890,2501249d97aa..bb90bbdbe2d9
> --- a/arch/arm/boot/dts/exynos5410.dtsi
> +++ b/arch/arm/boot/dts/exynos5410.dtsi
> @@@ -190,58 -181,8 +190,58 @@@
> pinctrl_3: pinctrl@0386 {
> compatible = "samsung,exynos5410-pinctrl";
> reg = <0x0386 0x1000>;
> -   interrupts = <0 47 0>;
> +   interrupts = ;
> };
>  +
>  +  amba {
>  +  #address-cells = <1>;
>  +  #size-cells = <1>;
>  +  compatible = "simple-bus";
>  +  interrupt-parent = <>;
>  +  ranges;
> 
> 
> Best regards,
> Krzysztof
> 
> 
> The following changes since commit 1001354ca34179f3db924eb66672442a173147dc:
> 
>   Linux 4.9-rc1 (2016-10-15 12:17:50 -0700)
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/krzk/linux.git 
> tags/samsung-dt-gic-flags-4.10
> 
> for you to fetch changes up to 04a886727ca7e841afa2fbc5d87aff81ae256dbf:
> 
>   ARM: dts: exynos: Use human-friendly symbols for interrupt properties in 
> exynos5440 (2016-11-03 22:44:56 +0200)
> 
> 
> Topic branch with DT changes for v4.10.
> 
> Fix invalid GIC interrupt flags - type IRQ_TYPE_NONE is not allowed for GIC
> interrupts.  Although this was working but with error messages like:
>   genirq: Setting trigger mode 0 for irq 16 failed
> 
> Use level high interrupt instead of type none.  The choice of level high was
> rather an arbitrary decision hoping it will work on each platform.  Tests 
> shown
> no issues so far.

Merged, thanks. FYI, you could have based this on the previous DT branch
if you wanted to not expose the conflicts, but simple ones like these
(in particular that we can resolve in our own tree) are OK to expose too.


-Olof


[PATCH] drm/panel: simple: add 8bit-bps for Sharp lq123p1jx31

2016-11-18 Thread zain wang
Sharp lq123p1jx31 support 8bit bps.

Signed-off-by: zain wang 
---
 drivers/gpu/drm/panel/panel-simple.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 113db3c..6b0c026 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1409,6 +1409,7 @@ static const struct drm_display_mode 
sharp_lq123p1jx31_mode = {
 static const struct panel_desc sharp_lq123p1jx31 = {
.modes = _lq123p1jx31_mode,
.num_modes = 1,
+   .bpc = 8,
.size = {
.width = 259,
.height = 173,
-- 
1.9.1




[PATCH] drm/panel: simple: add 8bit-bps for Sharp lq123p1jx31

2016-11-18 Thread zain wang
Sharp lq123p1jx31 support 8bit bps.

Signed-off-by: zain wang 
---
 drivers/gpu/drm/panel/panel-simple.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/gpu/drm/panel/panel-simple.c 
b/drivers/gpu/drm/panel/panel-simple.c
index 113db3c..6b0c026 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -1409,6 +1409,7 @@ static const struct drm_display_mode 
sharp_lq123p1jx31_mode = {
 static const struct panel_desc sharp_lq123p1jx31 = {
.modes = _lq123p1jx31_mode,
.num_modes = 1,
+   .bpc = 8,
.size = {
.width = 259,
.height = 173,
-- 
1.9.1




Re: [PATCH] infiniband: remove WARN that is not kernel bug

2016-11-18 Thread Valdis . Kletnieks
On Fri, 18 Nov 2016 12:24:37 +0100, Dmitry Vyukov said:
> WARNINGs mean kernel bugs.
> The one in ucma_write() points to user programming error
> or a malicious attempt. This is not a kernel bug, remove it.

> - if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
> + if (!ib_safe_file_access(filp))
>   return -EACCES;

In that case, wouldn't this be better?

if (!ib_safe_file_access(filp)) {
printk_once("Process %d (%s) tried to do something hinky", pid, 
comm);
return _EACCESS;
}

so the sysadmin becomes aware of the malicious attempt?


pgp2N2mVz1yBo.pgp
Description: PGP signature


Re: [PATCH] infiniband: remove WARN that is not kernel bug

2016-11-18 Thread Valdis . Kletnieks
On Fri, 18 Nov 2016 12:24:37 +0100, Dmitry Vyukov said:
> WARNINGs mean kernel bugs.
> The one in ucma_write() points to user programming error
> or a malicious attempt. This is not a kernel bug, remove it.

> - if (WARN_ON_ONCE(!ib_safe_file_access(filp)))
> + if (!ib_safe_file_access(filp))
>   return -EACCES;

In that case, wouldn't this be better?

if (!ib_safe_file_access(filp)) {
printk_once("Process %d (%s) tried to do something hinky", pid, 
comm);
return _EACCESS;
}

so the sysadmin becomes aware of the malicious attempt?


pgp2N2mVz1yBo.pgp
Description: PGP signature


Re: [PATCH] arm64: dts: qcom: msm8996: Fixup smp2p node

2016-11-18 Thread Sarangdhar Joshi

On 11/18/2016 12:06 PM, Bjorn Andersson wrote:

The SMEM state property name changes between the integration branch and
mainline, update to use the correct one.

Fixes: 2f45d9fcd531 ("arm64: dts: msm8996: Add SMP2P and APCS nodes")
Signed-off-by: Bjorn Andersson 


Thanks for the change and sorry for missing it earlier.

Reviewed-by: Sarangdhar Joshi 



Re: [PATCH] arm64: dts: qcom: msm8996: Fixup smp2p node

2016-11-18 Thread Sarangdhar Joshi

On 11/18/2016 12:06 PM, Bjorn Andersson wrote:

The SMEM state property name changes between the integration branch and
mainline, update to use the correct one.

Fixes: 2f45d9fcd531 ("arm64: dts: msm8996: Add SMP2P and APCS nodes")
Signed-off-by: Bjorn Andersson 


Thanks for the change and sorry for missing it earlier.

Reviewed-by: Sarangdhar Joshi 



Re: [PATCH] ARM: qcom_defconfig: Enable RPM/RPM-SMD clocks

2016-11-18 Thread Olof Johansson
On Thu, Nov 17, 2016 at 05:20:26PM +0200, Georgi Djakov wrote:
> Enable support for clocks, controlled by the RPM processor on
> Qualcomm platforms.
> 
> Signed-off-by: Georgi Djakov 

Hi,

Please send patches like these to the platform maintainer (i.e. Andy
Gross), and leave a...@kernel.org off of Cc.

We prefer that only maintainers email us at this address to avoid getting
cc:d on everything (and dealing with the volume of email that entails).


Thanks!

-Olof



Re: [PATCH] ARM: qcom_defconfig: Enable RPM/RPM-SMD clocks

2016-11-18 Thread Olof Johansson
On Thu, Nov 17, 2016 at 05:20:26PM +0200, Georgi Djakov wrote:
> Enable support for clocks, controlled by the RPM processor on
> Qualcomm platforms.
> 
> Signed-off-by: Georgi Djakov 

Hi,

Please send patches like these to the platform maintainer (i.e. Andy
Gross), and leave a...@kernel.org off of Cc.

We prefer that only maintainers email us at this address to avoid getting
cc:d on everything (and dealing with the volume of email that entails).


Thanks!

-Olof



[PATCH] mfd: cpcap: Add minimal support

2016-11-18 Thread Tony Lindgren
Many Motorola phones like droid 4 are using a custom PMIC called CPCAP
or 6556002. We can support it's core features quite easily with regmap_spi
and regmap_irq.

The children of cpcap, such as regulators, ADC and USB, can be just regular
device drivers and defined in the dts file. They get probed as we call
of_platform_populate() at the end of our probe, and then the children
can just call dev_get_regmap(dev.parent, NULL) to get the regmap.

Cc: devicet...@vger.kernel.org
Cc: Marcel Partap 
Cc: Mark Rutland 
Cc: Michael Scott 
Cc: Rob Herring 
Signed-off-by: Tony Lindgren 
---
 Documentation/devicetree/bindings/mfd/cpcap.txt |  36 
 drivers/mfd/Kconfig |   8 +
 drivers/mfd/Makefile|   1 +
 drivers/mfd/cpcap.c | 255 
 include/linux/mfd/cpcap.h   | 238 ++
 5 files changed, 538 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/cpcap.txt
 create mode 100644 drivers/mfd/cpcap.c
 create mode 100644 include/linux/mfd/cpcap.h

diff --git a/Documentation/devicetree/bindings/mfd/cpcap.txt 
b/Documentation/devicetree/bindings/mfd/cpcap.txt
new file mode 100644
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/cpcap.txt
@@ -0,0 +1,36 @@
+CPCAP PMIC device tree binding
+
+Required properties:
+- compatible   : Motorola device "motorola,cpcap", others "st,6556002"
+- reg  : Chip select and size
+- interrupt-parent : The parent interrupt controller
+- interrupts   : The interrupt line the device is connected to
+- interrupt-controller : Marks the device node as an interrupt controller
+- #interrupt-cells : The number of cells to describe an IRQ, should be 2
+- #address-cells   : Child device offset number of cells, typically 1
+- #size-cells  : Child device size number of cells, typically 1
+- ranges   : Child device register range
+- spi-max-frequency: Typically set to 300
+- spi-cs_high  : SPI chip select direction
+
+Example:
+
+ {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges;
+   cpcap: pmic@0 {
+   compatible = "motorola,cpcap", "st,6556002";
+   reg = <0 0>;/* cs0, size 0 */
+   interrupt-parent = <>;
+   interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0 0x8000>;
+   spi-max-frequency = <300>;
+   spi-cs-high;
+   };
+};
+
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -713,6 +713,14 @@ config EZX_PCAP
  This enables the PCAP ASIC present on EZX Phones. This is
  needed for MMC, TouchScreen, Sound, USB, etc..
 
+config MFD_CPCAP
+   tristate "Support for CPCAP"
+   depends on SPI && OF
+   help
+ Say yes here if you want to include driver for CPCAP.
+ It is used on many Motorola phones and tablets as a PMIC.
+ At least Motorola Droid 4 is known to use CPCAP.
+
 config MFD_VIPERBOARD
 tristate "Nano River Technologies Viperboard"
select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_MFD_MC13XXX_I2C) += mc13xxx-i2c.o
 obj-$(CONFIG_MFD_CORE) += mfd-core.o
 
 obj-$(CONFIG_EZX_PCAP) += ezx-pcap.o
+obj-$(CONFIG_MFD_CPCAP)+= cpcap.o
 
 obj-$(CONFIG_MCP)  += mcp-core.o
 obj-$(CONFIG_MCP_SA11X0)   += mcp-sa11x0.o
diff --git a/drivers/mfd/cpcap.c b/drivers/mfd/cpcap.c
new file mode 100644
--- /dev/null
+++ b/drivers/mfd/cpcap.c
@@ -0,0 +1,255 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#define CPCAP_NR_IRQ_BANKS 6
+#define CPCAP_NR_IRQ_DOMAINS   3
+
+struct cpcap_device {
+   struct spi_device *spi;
+   struct device *dev;
+   u16 vendor;
+   u16 revision;
+   const struct cpcap_platform_data *conf;
+   struct regmap_irq *irqs;
+   struct regmap_irq_chip_data *irqdata[CPCAP_NR_IRQ_DOMAINS];
+   const struct regmap_config *regmap_conf;
+   struct regmap *regmap;
+};
+
+static int cpcap_check_revision(struct cpcap_device *cpcap)
+{
+   unsigned int val;
+   int error;
+
+   error = regmap_read(cpcap->regmap, CPCAP_REG_VERSC1, );
+   if (error)
+   return error;
+

[PATCH] mfd: cpcap: Add minimal support

2016-11-18 Thread Tony Lindgren
Many Motorola phones like droid 4 are using a custom PMIC called CPCAP
or 6556002. We can support it's core features quite easily with regmap_spi
and regmap_irq.

The children of cpcap, such as regulators, ADC and USB, can be just regular
device drivers and defined in the dts file. They get probed as we call
of_platform_populate() at the end of our probe, and then the children
can just call dev_get_regmap(dev.parent, NULL) to get the regmap.

Cc: devicet...@vger.kernel.org
Cc: Marcel Partap 
Cc: Mark Rutland 
Cc: Michael Scott 
Cc: Rob Herring 
Signed-off-by: Tony Lindgren 
---
 Documentation/devicetree/bindings/mfd/cpcap.txt |  36 
 drivers/mfd/Kconfig |   8 +
 drivers/mfd/Makefile|   1 +
 drivers/mfd/cpcap.c | 255 
 include/linux/mfd/cpcap.h   | 238 ++
 5 files changed, 538 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/mfd/cpcap.txt
 create mode 100644 drivers/mfd/cpcap.c
 create mode 100644 include/linux/mfd/cpcap.h

diff --git a/Documentation/devicetree/bindings/mfd/cpcap.txt 
b/Documentation/devicetree/bindings/mfd/cpcap.txt
new file mode 100644
--- /dev/null
+++ b/Documentation/devicetree/bindings/mfd/cpcap.txt
@@ -0,0 +1,36 @@
+CPCAP PMIC device tree binding
+
+Required properties:
+- compatible   : Motorola device "motorola,cpcap", others "st,6556002"
+- reg  : Chip select and size
+- interrupt-parent : The parent interrupt controller
+- interrupts   : The interrupt line the device is connected to
+- interrupt-controller : Marks the device node as an interrupt controller
+- #interrupt-cells : The number of cells to describe an IRQ, should be 2
+- #address-cells   : Child device offset number of cells, typically 1
+- #size-cells  : Child device size number of cells, typically 1
+- ranges   : Child device register range
+- spi-max-frequency: Typically set to 300
+- spi-cs_high  : SPI chip select direction
+
+Example:
+
+ {
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges;
+   cpcap: pmic@0 {
+   compatible = "motorola,cpcap", "st,6556002";
+   reg = <0 0>;/* cs0, size 0 */
+   interrupt-parent = <>;
+   interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+   interrupt-controller;
+   #interrupt-cells = <2>;
+   #address-cells = <1>;
+   #size-cells = <1>;
+   ranges = <0 0 0x8000>;
+   spi-max-frequency = <300>;
+   spi-cs-high;
+   };
+};
+
diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -713,6 +713,14 @@ config EZX_PCAP
  This enables the PCAP ASIC present on EZX Phones. This is
  needed for MMC, TouchScreen, Sound, USB, etc..
 
+config MFD_CPCAP
+   tristate "Support for CPCAP"
+   depends on SPI && OF
+   help
+ Say yes here if you want to include driver for CPCAP.
+ It is used on many Motorola phones and tablets as a PMIC.
+ At least Motorola Droid 4 is known to use CPCAP.
+
 config MFD_VIPERBOARD
 tristate "Nano River Technologies Viperboard"
select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -97,6 +97,7 @@ obj-$(CONFIG_MFD_MC13XXX_I2C) += mc13xxx-i2c.o
 obj-$(CONFIG_MFD_CORE) += mfd-core.o
 
 obj-$(CONFIG_EZX_PCAP) += ezx-pcap.o
+obj-$(CONFIG_MFD_CPCAP)+= cpcap.o
 
 obj-$(CONFIG_MCP)  += mcp-core.o
 obj-$(CONFIG_MCP_SA11X0)   += mcp-sa11x0.o
diff --git a/drivers/mfd/cpcap.c b/drivers/mfd/cpcap.c
new file mode 100644
--- /dev/null
+++ b/drivers/mfd/cpcap.c
@@ -0,0 +1,255 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+#define CPCAP_NR_IRQ_BANKS 6
+#define CPCAP_NR_IRQ_DOMAINS   3
+
+struct cpcap_device {
+   struct spi_device *spi;
+   struct device *dev;
+   u16 vendor;
+   u16 revision;
+   const struct cpcap_platform_data *conf;
+   struct regmap_irq *irqs;
+   struct regmap_irq_chip_data *irqdata[CPCAP_NR_IRQ_DOMAINS];
+   const struct regmap_config *regmap_conf;
+   struct regmap *regmap;
+};
+
+static int cpcap_check_revision(struct cpcap_device *cpcap)
+{
+   unsigned int val;
+   int error;
+
+   error = regmap_read(cpcap->regmap, CPCAP_REG_VERSC1, );
+   if (error)
+   return error;
+
+   cpcap->vendor = (val >> 6) & 0x0007;
+   cpcap->revision = ((val >> 3) & 0x0007) | ((val 

Re: [RFC PATCH] SRCU: More efficient reader counts.

2016-11-18 Thread Lance Roy
On Fri, 18 Nov 2016 15:13:45 -0800
"Paul E. McKenney"  wrote:
> On Fri, Nov 18, 2016 at 12:33:00PM -0800, Lance Roy wrote:  
> > The trouble is that disabling preemption is not enough to ensure that there
> > is at most one srcu_read_lock() call per CPU that missed the srcu_flip().
> > 
> > Define a reader to be an SRCU lock+unlock pair. A reader is called active
> > if it has incremented ->lock_count[] but hasn't incremented  
> > ->unlock_count[] yet, and completed if it has incremented
> > ->unlock_count[]. I think that we only want to limit the number of active
> > readers and the number of CPUs. In particular, I don't think there should
> > be a limit on the rate of completion of read side critical section.
> > 
> > The goal of srcu_readers_active_idx_check() is to verify that there were
> > zero active readers on the inactive index at some time during its
> > execution. To do this, it totals the unlock counts, executes a memory
> > barrier, totals the lock counts, and takes the difference. This difference
> > counts the readers that are active when srcu_readers_lock_idx() gets to
> > their CPU, plus the readers that completed after srcu_readers_unlock_idx()
> > and before srcu_readers_lock_idx(). If the true (infinite precision) value
> > of the difference is zero, then there were no active readers at some point
> > while srcu_readers_lock_idx() is running. However, the difference is only
> > stored in a long, so there is a potential for overflow if too many readers
> > complete during srcu_readers_active_idx_check().
> > 
> > For example, let's say there are three threads, each running on their own
> > CPU:
> > 
> > int data, flag;
> > struct srcu_struct *sp = /* ... */;
> > 
> > Thread 0:
> > data = 1;
> > synchronize_srcu(sp);
> > flag = 1;
> > 
> > Thread 1:
> > int data_r, flag_r;
> > int idx = srcu_read_lock(sp);
> > data_r = data;
> > flag_r = flag;
> > srcu_read_unlock(sp, idx);
> > BUG_ON(flag_r == 1 && data_r == 0);
> > 
> > Thread 2:
> > while (true) {
> > int idx = srcu_read_lock(sp);
> > srcu_read_unlock(sp, idx);
> > }
> > 
> > Let's take the following execution order. Thread 1 increments
> > the CPU 1 version of sp->lock_count[0], sets idx to zero, and loads data (0)
> > into data_r. Thread 0 then sets data to be 1, verifies that there are no
> > readers on index 1, and increments sp->completed, but the CPU actually
> > doesn't preform the last operation, putting it off until the next memory
> > barrier. Thread 0 then calls srcu_readers_active_idx_check() on index 0,
> > which runs srcu_readers_unlock_idx() (returning 0). Right after
> > srcu_readers_unlock_idx() completes, thread 2 starts running. Since Thread
> > 0 hasn't actually incremented sp->completed in any way that is visible to
> > thread 2, srcu_read_lock() will still return 0. Thread 2 can then run for
> > ULONG_MAX iterations, setting the CPU 2 version of sp->unlock_count[0] to
> > ULONG_MAX. CPU 0 then finally gets around to incrementing sp->completed,
> > runs its memory barrier, and then reads the lock counts: 1, 0, ULONG_MAX.
> > The total of ULONG_MAX+1 will overflow to 0 and compare equal with earlier
> > unlock count. Thread 0 will then think that the grace period is over and
> > set flag to one. Thread 1 can then read flag (1) into flag_r and run
> > srcu_read_unlock(). The BUG_ON statement will then fail.
> > 
> > Although ULONG_MAX readers completed during srcu_readers_active_idx_check(),
> > there were at most 2 active readers at any time, so this program doesn't run
> > into any limit.
> > 
> > I hope that was clear enough.
> 
> Indeed it is!
> 
> So adding a full memory barrier immediately after the srcu_flip() should
> prevent this, because if the updater failed to see an unlock increment,
> the second following lock for that CPU/task would be guaranteed to see
> the flip.  Or am I still missing something?  
Yes, adding a full memory barrier after srcu_flip() prevents this problem.

> Is there a sequence of events that requires a full memory barrier
> before the srcu_flip()?  
I am now unsure if a memory barrier before srcu_flip() is necessary. I thought
that it would be needed to prevent the CPU from preforming the increment early,
but I have just noticed that srcu_advance_batches() will return early if the
first try_check_zero() fails, creating a control dependency. I think that this
control dependency should be enough to prevent the problem from occurring.

One interesting thing about this version is that there is only an address
dependency between the load of ->completed and the increment of ->load_count[].
This means that without an smp_read_barrier_depends() between the two, a reader
could use the new value of ->completed to increment ->load_count[], before
actually reading ->completed. (I was surprised that this ordering doesn't come
for free, as it seems like violating it would require speculative 

Re: [RFC PATCH] SRCU: More efficient reader counts.

2016-11-18 Thread Lance Roy
On Fri, 18 Nov 2016 15:13:45 -0800
"Paul E. McKenney"  wrote:
> On Fri, Nov 18, 2016 at 12:33:00PM -0800, Lance Roy wrote:  
> > The trouble is that disabling preemption is not enough to ensure that there
> > is at most one srcu_read_lock() call per CPU that missed the srcu_flip().
> > 
> > Define a reader to be an SRCU lock+unlock pair. A reader is called active
> > if it has incremented ->lock_count[] but hasn't incremented  
> > ->unlock_count[] yet, and completed if it has incremented
> > ->unlock_count[]. I think that we only want to limit the number of active
> > readers and the number of CPUs. In particular, I don't think there should
> > be a limit on the rate of completion of read side critical section.
> > 
> > The goal of srcu_readers_active_idx_check() is to verify that there were
> > zero active readers on the inactive index at some time during its
> > execution. To do this, it totals the unlock counts, executes a memory
> > barrier, totals the lock counts, and takes the difference. This difference
> > counts the readers that are active when srcu_readers_lock_idx() gets to
> > their CPU, plus the readers that completed after srcu_readers_unlock_idx()
> > and before srcu_readers_lock_idx(). If the true (infinite precision) value
> > of the difference is zero, then there were no active readers at some point
> > while srcu_readers_lock_idx() is running. However, the difference is only
> > stored in a long, so there is a potential for overflow if too many readers
> > complete during srcu_readers_active_idx_check().
> > 
> > For example, let's say there are three threads, each running on their own
> > CPU:
> > 
> > int data, flag;
> > struct srcu_struct *sp = /* ... */;
> > 
> > Thread 0:
> > data = 1;
> > synchronize_srcu(sp);
> > flag = 1;
> > 
> > Thread 1:
> > int data_r, flag_r;
> > int idx = srcu_read_lock(sp);
> > data_r = data;
> > flag_r = flag;
> > srcu_read_unlock(sp, idx);
> > BUG_ON(flag_r == 1 && data_r == 0);
> > 
> > Thread 2:
> > while (true) {
> > int idx = srcu_read_lock(sp);
> > srcu_read_unlock(sp, idx);
> > }
> > 
> > Let's take the following execution order. Thread 1 increments
> > the CPU 1 version of sp->lock_count[0], sets idx to zero, and loads data (0)
> > into data_r. Thread 0 then sets data to be 1, verifies that there are no
> > readers on index 1, and increments sp->completed, but the CPU actually
> > doesn't preform the last operation, putting it off until the next memory
> > barrier. Thread 0 then calls srcu_readers_active_idx_check() on index 0,
> > which runs srcu_readers_unlock_idx() (returning 0). Right after
> > srcu_readers_unlock_idx() completes, thread 2 starts running. Since Thread
> > 0 hasn't actually incremented sp->completed in any way that is visible to
> > thread 2, srcu_read_lock() will still return 0. Thread 2 can then run for
> > ULONG_MAX iterations, setting the CPU 2 version of sp->unlock_count[0] to
> > ULONG_MAX. CPU 0 then finally gets around to incrementing sp->completed,
> > runs its memory barrier, and then reads the lock counts: 1, 0, ULONG_MAX.
> > The total of ULONG_MAX+1 will overflow to 0 and compare equal with earlier
> > unlock count. Thread 0 will then think that the grace period is over and
> > set flag to one. Thread 1 can then read flag (1) into flag_r and run
> > srcu_read_unlock(). The BUG_ON statement will then fail.
> > 
> > Although ULONG_MAX readers completed during srcu_readers_active_idx_check(),
> > there were at most 2 active readers at any time, so this program doesn't run
> > into any limit.
> > 
> > I hope that was clear enough.
> 
> Indeed it is!
> 
> So adding a full memory barrier immediately after the srcu_flip() should
> prevent this, because if the updater failed to see an unlock increment,
> the second following lock for that CPU/task would be guaranteed to see
> the flip.  Or am I still missing something?  
Yes, adding a full memory barrier after srcu_flip() prevents this problem.

> Is there a sequence of events that requires a full memory barrier
> before the srcu_flip()?  
I am now unsure if a memory barrier before srcu_flip() is necessary. I thought
that it would be needed to prevent the CPU from preforming the increment early,
but I have just noticed that srcu_advance_batches() will return early if the
first try_check_zero() fails, creating a control dependency. I think that this
control dependency should be enough to prevent the problem from occurring.

One interesting thing about this version is that there is only an address
dependency between the load of ->completed and the increment of ->load_count[].
This means that without an smp_read_barrier_depends() between the two, a reader
could use the new value of ->completed to increment ->load_count[], before
actually reading ->completed. (I was surprised that this ordering doesn't come
for free, as it seems like violating it would require speculative writes.) It
doesn't matter much 

[PATCH v2] of: Fix issue where code would fall through to error case.

2016-11-18 Thread Moritz Fischer
No longer fall through into the error case that prints out
an error if no error (err = 0) occurred.

Fixes d9181b20a83(of: Add back an error message, restructured)
Signed-off-by: Moritz Fischer 
Reviewed-by: Frank Rowand 
---
Hi Rob,

this is Frank's suggestion. I don't have a strong preference one way or the 
other.
Feel free to pick one or the other.

Cheers,

Moritz
---
 drivers/of/resolver.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 783bd09..c47404f3 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -358,14 +358,14 @@ int of_resolve_phandles(struct device_node *overlay)
 
err = update_usages_of_a_phandle_reference(overlay, prop, 
phandle);
if (err)
-   break;
+   goto err_out;
}
-
-err_out:
-   pr_err("overlay phandle fixup failed: %d\n", err);
 out:
of_node_put(tree_symbols);
-
return err;
+
+err_out:
+   pr_err("overlay phandle fixup failed: %d\n", err);
+   goto out;
 }
 EXPORT_SYMBOL_GPL(of_resolve_phandles);
-- 
2.7.4



[PATCH v2] of: Fix issue where code would fall through to error case.

2016-11-18 Thread Moritz Fischer
No longer fall through into the error case that prints out
an error if no error (err = 0) occurred.

Fixes d9181b20a83(of: Add back an error message, restructured)
Signed-off-by: Moritz Fischer 
Reviewed-by: Frank Rowand 
---
Hi Rob,

this is Frank's suggestion. I don't have a strong preference one way or the 
other.
Feel free to pick one or the other.

Cheers,

Moritz
---
 drivers/of/resolver.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
index 783bd09..c47404f3 100644
--- a/drivers/of/resolver.c
+++ b/drivers/of/resolver.c
@@ -358,14 +358,14 @@ int of_resolve_phandles(struct device_node *overlay)
 
err = update_usages_of_a_phandle_reference(overlay, prop, 
phandle);
if (err)
-   break;
+   goto err_out;
}
-
-err_out:
-   pr_err("overlay phandle fixup failed: %d\n", err);
 out:
of_node_put(tree_symbols);
-
return err;
+
+err_out:
+   pr_err("overlay phandle fixup failed: %d\n", err);
+   goto out;
 }
 EXPORT_SYMBOL_GPL(of_resolve_phandles);
-- 
2.7.4



Re: [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13

2016-11-18 Thread John Hubbard
On Fri, 18 Nov 2016, Jérôme Glisse wrote:

> Cliff note: HMM offers 2 things (each standing on its own). First
> it allows to use device memory transparently inside any process
> without any modifications to process program code. Second it allows
> to mirror process address space on a device.
> 
> Change since v12 is the use of struct page for device memory even if
> the device memory is not accessible by the CPU (because of limitation
> impose by the bus between the CPU and the device).
> 
> Using struct page means that their are minimal changes to core mm
> code. HMM build on top of ZONE_DEVICE to provide struct page, it
> adds new features to ZONE_DEVICE. The first 7 patches implement
> those changes.
> 
> Rest of patchset is divided into 3 features that can each be use
> independently from one another. First is the process address space
> mirroring (patch 9 to 13), this allow to snapshot CPU page table
> and to keep the device page table synchronize with the CPU one.
> 
> Second is a new memory migration helper which allow migration of
> a range of virtual address of a process. This memory migration
> also allow device to use their own DMA engine to perform the copy
> between the source memory and destination memory. This can be
> usefull even outside HMM context in many usecase.
> 
> Third part of the patchset (patch 17-18) is a set of helper to
> register a ZONE_DEVICE node and manage it. It is meant as a
> convenient helper so that device drivers do not each have to
> reimplement over and over the same boiler plate code.
> 
> 
> I am hoping that this can now be consider for inclusion upstream.
> Bottom line is that without HMM we can not support some of the new
> hardware features on x86 PCIE. I do believe we need some solution
> to support those features or we won't be able to use such hardware
> in standard like C++17, OpenCL 3.0 and others.
> 
> I have been working with NVidia to bring up this feature on their
> Pascal GPU. There are real hardware that you can buy today that
> could benefit from HMM. We also intend to leverage this inside the
> open source nouveau driver.
> 

Hi,

We (NVIDIA engineering) have been working closely with Jerome on this for 
several years now, and I wanted to mention that NVIDIA is committed to 
using HMM. We've done initial testing of this patchset on Pascal GPUs (a 
bit more detail below) and it is looking good.
  
The HMM features are a prerequisite to an important part of NVIDIA's 
efforts to make writing code for GPUs (and other page-faulting devices) 
easier--by making it more like writing code for CPUs. A big part of that 
story involves being able to use malloc'd memory transparently everywhere. 
Here's a tiny example (in case it's not obvious from the HMM patchset 
documentation) of HMM in action:

int *p = (int*)malloc(SIZE); *p = 5; /* on the CPU */

x = *p;   /* on a GPU, or on any page-fault-capable device */
   
1. A device page fault occurs because the malloc'd memory was never 
allocated in the device's page tables.
  
2. The device driver receives a page fault interrupt, but fails to 
recognize the address, so it calls into HMM.

3. HMM knows that p is valid on the CPU, and coordinates with the device 
driver to unmap the CPU page, allocate a page on the device, and then 
migrate (copy) the data to the device. This allows full device memory 
bandwidth to be available, which is critical to getting good performance.

a) Alternatively, leave the page on the CPU, and create a device 
PTE to point to that page. This might be done if our performance counters 
show that a page is thrashing.
   
4. The device driver issues a replay-page-fault to the device.
 
5. The device program continues running, and x == 5 now.

When version 1 of this patchset was created (2.5 years ago! in May, 2014), 
one huge concern was that we didn't yet have hardware that could use it.  
But now we do: Pascal GPUs, which have been shipping this year, all 
support replayable page faults.

Testing:

We have done some testing of this latest patchset on Pascal GPUs using our 
nvidia-uvm.ko module (which is open source, separate from the closed 
source nvidia.ko). There is still much more testing to do, of course, but 
basic page mirroring and page migration (between CPU and GPU), and even 
some multi-GPU cases, are all working.

We do think we've found a bug in a corner case that involves invalid GPU 
memory (of course, it's always possible that the bug is on our side), 
which Jerome is investigating now. If you spot the bug by inspection, 
you'll get some major told-you-so points. :)

The performance is looking good on the testing we’ve done so far, too.

thanks,

John Hubbard
NVIDIA Systems Software Engineer

> 
> In this patchset i restricted myself to set of core features what
> is missing:
>   - force read only on CPU for memory duplication and GPU atomic
>   - changes to mmu_notifier for optimization purposes
>   - migration of file back page to device memory

Re: [HMM v13 00/18] HMM (Heterogeneous Memory Management) v13

2016-11-18 Thread John Hubbard
On Fri, 18 Nov 2016, Jérôme Glisse wrote:

> Cliff note: HMM offers 2 things (each standing on its own). First
> it allows to use device memory transparently inside any process
> without any modifications to process program code. Second it allows
> to mirror process address space on a device.
> 
> Change since v12 is the use of struct page for device memory even if
> the device memory is not accessible by the CPU (because of limitation
> impose by the bus between the CPU and the device).
> 
> Using struct page means that their are minimal changes to core mm
> code. HMM build on top of ZONE_DEVICE to provide struct page, it
> adds new features to ZONE_DEVICE. The first 7 patches implement
> those changes.
> 
> Rest of patchset is divided into 3 features that can each be use
> independently from one another. First is the process address space
> mirroring (patch 9 to 13), this allow to snapshot CPU page table
> and to keep the device page table synchronize with the CPU one.
> 
> Second is a new memory migration helper which allow migration of
> a range of virtual address of a process. This memory migration
> also allow device to use their own DMA engine to perform the copy
> between the source memory and destination memory. This can be
> usefull even outside HMM context in many usecase.
> 
> Third part of the patchset (patch 17-18) is a set of helper to
> register a ZONE_DEVICE node and manage it. It is meant as a
> convenient helper so that device drivers do not each have to
> reimplement over and over the same boiler plate code.
> 
> 
> I am hoping that this can now be consider for inclusion upstream.
> Bottom line is that without HMM we can not support some of the new
> hardware features on x86 PCIE. I do believe we need some solution
> to support those features or we won't be able to use such hardware
> in standard like C++17, OpenCL 3.0 and others.
> 
> I have been working with NVidia to bring up this feature on their
> Pascal GPU. There are real hardware that you can buy today that
> could benefit from HMM. We also intend to leverage this inside the
> open source nouveau driver.
> 

Hi,

We (NVIDIA engineering) have been working closely with Jerome on this for 
several years now, and I wanted to mention that NVIDIA is committed to 
using HMM. We've done initial testing of this patchset on Pascal GPUs (a 
bit more detail below) and it is looking good.
  
The HMM features are a prerequisite to an important part of NVIDIA's 
efforts to make writing code for GPUs (and other page-faulting devices) 
easier--by making it more like writing code for CPUs. A big part of that 
story involves being able to use malloc'd memory transparently everywhere. 
Here's a tiny example (in case it's not obvious from the HMM patchset 
documentation) of HMM in action:

int *p = (int*)malloc(SIZE); *p = 5; /* on the CPU */

x = *p;   /* on a GPU, or on any page-fault-capable device */
   
1. A device page fault occurs because the malloc'd memory was never 
allocated in the device's page tables.
  
2. The device driver receives a page fault interrupt, but fails to 
recognize the address, so it calls into HMM.

3. HMM knows that p is valid on the CPU, and coordinates with the device 
driver to unmap the CPU page, allocate a page on the device, and then 
migrate (copy) the data to the device. This allows full device memory 
bandwidth to be available, which is critical to getting good performance.

a) Alternatively, leave the page on the CPU, and create a device 
PTE to point to that page. This might be done if our performance counters 
show that a page is thrashing.
   
4. The device driver issues a replay-page-fault to the device.
 
5. The device program continues running, and x == 5 now.

When version 1 of this patchset was created (2.5 years ago! in May, 2014), 
one huge concern was that we didn't yet have hardware that could use it.  
But now we do: Pascal GPUs, which have been shipping this year, all 
support replayable page faults.

Testing:

We have done some testing of this latest patchset on Pascal GPUs using our 
nvidia-uvm.ko module (which is open source, separate from the closed 
source nvidia.ko). There is still much more testing to do, of course, but 
basic page mirroring and page migration (between CPU and GPU), and even 
some multi-GPU cases, are all working.

We do think we've found a bug in a corner case that involves invalid GPU 
memory (of course, it's always possible that the bug is on our side), 
which Jerome is investigating now. If you spot the bug by inspection, 
you'll get some major told-you-so points. :)

The performance is looking good on the testing we’ve done so far, too.

thanks,

John Hubbard
NVIDIA Systems Software Engineer

> 
> In this patchset i restricted myself to set of core features what
> is missing:
>   - force read only on CPU for memory duplication and GPU atomic
>   - changes to mmu_notifier for optimization purposes
>   - migration of file back page to device memory

Re: Long delays creating a netns after deleting one (possibly RCU related)

2016-11-18 Thread Eric Dumazet
On Fri, 2016-11-18 at 16:38 -0800, Jarno Rajahalme wrote:

> This fixes the problem for me, so for whatever it’s worth:
> 
> Tested-by: Jarno Rajahalme 
> 

Thanks for testing !

https://git.kernel.org/cgit/linux/kernel/git/davem/net.git/commit/?id=e88a2766143a27bfe6704b4493b214de4094cf29





Re: Long delays creating a netns after deleting one (possibly RCU related)

2016-11-18 Thread Eric Dumazet
On Fri, 2016-11-18 at 16:38 -0800, Jarno Rajahalme wrote:

> This fixes the problem for me, so for whatever it’s worth:
> 
> Tested-by: Jarno Rajahalme 
> 

Thanks for testing !

https://git.kernel.org/cgit/linux/kernel/git/davem/net.git/commit/?id=e88a2766143a27bfe6704b4493b214de4094cf29





Re: linux-next: manual merge of the sound tree with the jc_docs tree

2016-11-18 Thread Jonathan Corbet
On Fri, 18 Nov 2016 16:22:18 -0800
Jarkko Sakkinen  wrote:

> Given that there is now a directory for TPM rst documentation do you
> still want all changes to your tree or is it sufficient to just cc
> linux-doc?

For stuff in security/tpm?  It makes my life easier if documentation
patches come through my tree, but it's never going to be the case that
they all take that path.  I'm becoming increasingly insistent that changes
to the top-level makefiles and index.rst files need to come this way;
that way, Stephen doesn't need to send me so many polite merge-conflict
emails :)

For the rest, a CC is appreciated so that I know what's going on, but if
it works better for subsystem-specific documentation patches to go
through the relevant subsystem trees, then that's how it should be done.

Thanks,

jon


Re: linux-next: manual merge of the sound tree with the jc_docs tree

2016-11-18 Thread Jonathan Corbet
On Fri, 18 Nov 2016 16:22:18 -0800
Jarkko Sakkinen  wrote:

> Given that there is now a directory for TPM rst documentation do you
> still want all changes to your tree or is it sufficient to just cc
> linux-doc?

For stuff in security/tpm?  It makes my life easier if documentation
patches come through my tree, but it's never going to be the case that
they all take that path.  I'm becoming increasingly insistent that changes
to the top-level makefiles and index.rst files need to come this way;
that way, Stephen doesn't need to send me so many polite merge-conflict
emails :)

For the rest, a CC is appreciated so that I know what's going on, but if
it works better for subsystem-specific documentation patches to go
through the relevant subsystem trees, then that's how it should be done.

Thanks,

jon


[PATCH 03/10] perf, tools: Add support for parsing uncore json files

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

Handle the Unit field, which is needed to find the right PMU for
an event. We call it "pmu" and convert it to the perf pmu name
with an uncore prefix.

Handle the ExtSel field, which just extends the event mask with
an additional bit.

Handle the Filter field which adds parameters to the main event
to configure filtering.

Handle the Unit field which declares the unit the values
should be scaled too (similar to what the kernel exports)

Set up the perpkg field for uncore events so that perf
knows they are per package (similar to what the kernel exports)

Then output the fields into the pmu-events data structures which
are compiled into perf.

Filter out zero fields, except for the event itself.

v2: Fix compilation. Add uncore_ prefix at pre-processing time.
Move eventcode change to separate patch.
Signed-off-by: Andi Kleen 
---
 tools/perf/pmu-events/jevents.c| 74 +++---
 tools/perf/pmu-events/jevents.h|  4 ++-
 tools/perf/pmu-events/pmu-events.h |  3 ++
 tools/perf/util/pmu.c  | 22 +++-
 4 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index d343156b1dea..3a3ab5b17fc5 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -191,6 +191,27 @@ static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
return NULL;
 }
 
+static struct map {
+   const char *json;
+   const char *perf;
+} unit_to_pmu[] = {
+   { "CBO", "uncore_cbox" },
+   { "QPI LL", "uncore_qpi" },
+   { "SBO", "uncore_sbox" },
+   {}
+};
+
+static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
+{
+   int i;
+
+   for (i = 0; table[i].json; i++) {
+   if (json_streq(map, val, table[i].json))
+   return table[i].perf;
+   }
+   return NULL;
+}
+
 #define EXPECT(e, t, m) do { if (!(e)) {   \
jsmntok_t *loc = (t);   \
if (!(t)->start && (t) > tokens)\
@@ -272,7 +293,8 @@ static void print_events_table_prefix(FILE *fp, const char 
*tblname)
 }
 
 static int print_events_table_entry(void *data, char *name, char *event,
-   char *desc, char *long_desc)
+   char *desc, char *long_desc,
+   char *pmu, char *unit, char *perpkg)
 {
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -290,7 +312,12 @@ static int print_events_table_entry(void *data, char 
*name, char *event,
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
if (long_desc && long_desc[0])
fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
-
+   if (pmu)
+   fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
+   if (unit)
+   fprintf(outfp, "\t.unit = \"%s\",\n", unit);
+   if (perpkg)
+   fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
fprintf(outfp, "},\n");
 
return 0;
@@ -337,7 +364,8 @@ static char *real_event(const char *name, char *event)
 /* Call func with each event in the json file */
 int json_events(const char *fn,
  int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc),
+ char *long_desc,
+ char *pmu, char *unit, char *perpkg),
  void *data)
 {
int err = -EIO;
@@ -359,6 +387,10 @@ int json_events(const char *fn,
char *event = NULL, *desc = NULL, *name = NULL;
char *long_desc = NULL;
char *extra_desc = NULL;
+   char *pmu = NULL;
+   char *filter = NULL;
+   char *perpkg = NULL;
+   char *unit = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
@@ -385,6 +417,11 @@ int json_events(const char *fn,
addfield(map, , "", "", val);
eventcode |= strtoul(code, NULL, 0);
free(code);
+   } else if (json_streq(map, field, "ExtSel")) {
+   char *code = NULL;
+   addfield(map, , "", "", val);
+   eventcode |= strtoul(code, NULL, 0) << 21;
+   free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, , "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
@@ -408,6 +445,28 @@ int json_events(const char *fn,
addfield(map, _desc, ". ",
" Supports address when 

[PATCH 02/10] perf, tools: Parse eventcode as number in jevents

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

The next patch needs to modify event code. Previously eventcode was just
passed through as a string. Now parse it as a number.

Signed-off-by: Andi Kleen 
---
 tools/perf/pmu-events/jevents.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 41611d7f9873..d343156b1dea 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -135,7 +135,6 @@ static struct field {
const char *field;
const char *kernel;
 } fields[] = {
-   { "EventCode",  "event=" },
{ "UMask",  "umask=" },
{ "CounterMask", "cmask=" },
{ "Invert", "inv=" },
@@ -164,6 +163,9 @@ static int match_field(char *map, jsmntok_t *field, int nz,
 
for (f = fields; f->field; f++)
if (json_streq(map, field, f->field) && nz) {
+   if (json_streq(map, val, "0x00") ||
+json_streq(map, val, "0x0"))
+   return 1;
cut_comma(map, );
addfield(map, event, ",", f->kernel, );
return 1;
@@ -343,6 +345,7 @@ int json_events(const char *fn,
jsmntok_t *tokens, *tok;
int i, j, len;
char *map;
+   char buf[128];
 
if (!fn)
return -ENOENT;
@@ -356,6 +359,7 @@ int json_events(const char *fn,
char *event = NULL, *desc = NULL, *name = NULL;
char *long_desc = NULL;
char *extra_desc = NULL;
+   unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
jsmntok_t *precise = NULL;
@@ -376,6 +380,11 @@ int json_events(const char *fn,
nz = !json_streq(map, val, "0");
if (match_field(map, field, nz, , val)) {
/* ok */
+   } else if (json_streq(map, field, "EventCode")) {
+   char *code = NULL;
+   addfield(map, , "", "", val);
+   eventcode |= strtoul(code, NULL, 0);
+   free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, , "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
@@ -410,6 +419,8 @@ int json_events(const char *fn,
addfield(map, _desc, " ",
"(Precise event)", NULL);
}
+   snprintf(buf, sizeof buf, "event=%#llx", eventcode);
+   addfield(map, , ",", buf, NULL);
if (desc && extra_desc)
addfield(map, , " ", extra_desc, NULL);
if (long_desc && extra_desc)
-- 
2.5.5



[PATCH 03/10] perf, tools: Add support for parsing uncore json files

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

Handle the Unit field, which is needed to find the right PMU for
an event. We call it "pmu" and convert it to the perf pmu name
with an uncore prefix.

Handle the ExtSel field, which just extends the event mask with
an additional bit.

Handle the Filter field which adds parameters to the main event
to configure filtering.

Handle the Unit field which declares the unit the values
should be scaled too (similar to what the kernel exports)

Set up the perpkg field for uncore events so that perf
knows they are per package (similar to what the kernel exports)

Then output the fields into the pmu-events data structures which
are compiled into perf.

Filter out zero fields, except for the event itself.

v2: Fix compilation. Add uncore_ prefix at pre-processing time.
Move eventcode change to separate patch.
Signed-off-by: Andi Kleen 
---
 tools/perf/pmu-events/jevents.c| 74 +++---
 tools/perf/pmu-events/jevents.h|  4 ++-
 tools/perf/pmu-events/pmu-events.h |  3 ++
 tools/perf/util/pmu.c  | 22 +++-
 4 files changed, 90 insertions(+), 13 deletions(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index d343156b1dea..3a3ab5b17fc5 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -191,6 +191,27 @@ static struct msrmap *lookup_msr(char *map, jsmntok_t *val)
return NULL;
 }
 
+static struct map {
+   const char *json;
+   const char *perf;
+} unit_to_pmu[] = {
+   { "CBO", "uncore_cbox" },
+   { "QPI LL", "uncore_qpi" },
+   { "SBO", "uncore_sbox" },
+   {}
+};
+
+static const char *field_to_perf(struct map *table, char *map, jsmntok_t *val)
+{
+   int i;
+
+   for (i = 0; table[i].json; i++) {
+   if (json_streq(map, val, table[i].json))
+   return table[i].perf;
+   }
+   return NULL;
+}
+
 #define EXPECT(e, t, m) do { if (!(e)) {   \
jsmntok_t *loc = (t);   \
if (!(t)->start && (t) > tokens)\
@@ -272,7 +293,8 @@ static void print_events_table_prefix(FILE *fp, const char 
*tblname)
 }
 
 static int print_events_table_entry(void *data, char *name, char *event,
-   char *desc, char *long_desc)
+   char *desc, char *long_desc,
+   char *pmu, char *unit, char *perpkg)
 {
struct perf_entry_data *pd = data;
FILE *outfp = pd->outfp;
@@ -290,7 +312,12 @@ static int print_events_table_entry(void *data, char 
*name, char *event,
fprintf(outfp, "\t.topic = \"%s\",\n", topic);
if (long_desc && long_desc[0])
fprintf(outfp, "\t.long_desc = \"%s\",\n", long_desc);
-
+   if (pmu)
+   fprintf(outfp, "\t.pmu = \"%s\",\n", pmu);
+   if (unit)
+   fprintf(outfp, "\t.unit = \"%s\",\n", unit);
+   if (perpkg)
+   fprintf(outfp, "\t.perpkg = \"%s\",\n", perpkg);
fprintf(outfp, "},\n");
 
return 0;
@@ -337,7 +364,8 @@ static char *real_event(const char *name, char *event)
 /* Call func with each event in the json file */
 int json_events(const char *fn,
  int (*func)(void *data, char *name, char *event, char *desc,
- char *long_desc),
+ char *long_desc,
+ char *pmu, char *unit, char *perpkg),
  void *data)
 {
int err = -EIO;
@@ -359,6 +387,10 @@ int json_events(const char *fn,
char *event = NULL, *desc = NULL, *name = NULL;
char *long_desc = NULL;
char *extra_desc = NULL;
+   char *pmu = NULL;
+   char *filter = NULL;
+   char *perpkg = NULL;
+   char *unit = NULL;
unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
@@ -385,6 +417,11 @@ int json_events(const char *fn,
addfield(map, , "", "", val);
eventcode |= strtoul(code, NULL, 0);
free(code);
+   } else if (json_streq(map, field, "ExtSel")) {
+   char *code = NULL;
+   addfield(map, , "", "", val);
+   eventcode |= strtoul(code, NULL, 0) << 21;
+   free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, , "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
@@ -408,6 +445,28 @@ int json_events(const char *fn,
addfield(map, _desc, ". ",
" Supports address when precise",
   

[PATCH 02/10] perf, tools: Parse eventcode as number in jevents

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

The next patch needs to modify event code. Previously eventcode was just
passed through as a string. Now parse it as a number.

Signed-off-by: Andi Kleen 
---
 tools/perf/pmu-events/jevents.c | 13 -
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c
index 41611d7f9873..d343156b1dea 100644
--- a/tools/perf/pmu-events/jevents.c
+++ b/tools/perf/pmu-events/jevents.c
@@ -135,7 +135,6 @@ static struct field {
const char *field;
const char *kernel;
 } fields[] = {
-   { "EventCode",  "event=" },
{ "UMask",  "umask=" },
{ "CounterMask", "cmask=" },
{ "Invert", "inv=" },
@@ -164,6 +163,9 @@ static int match_field(char *map, jsmntok_t *field, int nz,
 
for (f = fields; f->field; f++)
if (json_streq(map, field, f->field) && nz) {
+   if (json_streq(map, val, "0x00") ||
+json_streq(map, val, "0x0"))
+   return 1;
cut_comma(map, );
addfield(map, event, ",", f->kernel, );
return 1;
@@ -343,6 +345,7 @@ int json_events(const char *fn,
jsmntok_t *tokens, *tok;
int i, j, len;
char *map;
+   char buf[128];
 
if (!fn)
return -ENOENT;
@@ -356,6 +359,7 @@ int json_events(const char *fn,
char *event = NULL, *desc = NULL, *name = NULL;
char *long_desc = NULL;
char *extra_desc = NULL;
+   unsigned long long eventcode = 0;
struct msrmap *msr = NULL;
jsmntok_t *msrval = NULL;
jsmntok_t *precise = NULL;
@@ -376,6 +380,11 @@ int json_events(const char *fn,
nz = !json_streq(map, val, "0");
if (match_field(map, field, nz, , val)) {
/* ok */
+   } else if (json_streq(map, field, "EventCode")) {
+   char *code = NULL;
+   addfield(map, , "", "", val);
+   eventcode |= strtoul(code, NULL, 0);
+   free(code);
} else if (json_streq(map, field, "EventName")) {
addfield(map, , "", "", val);
} else if (json_streq(map, field, "BriefDescription")) {
@@ -410,6 +419,8 @@ int json_events(const char *fn,
addfield(map, _desc, " ",
"(Precise event)", NULL);
}
+   snprintf(buf, sizeof buf, "event=%#llx", eventcode);
+   addfield(map, , ",", buf, NULL);
if (desc && extra_desc)
addfield(map, , " ", extra_desc, NULL);
if (long_desc && extra_desc)
-- 
2.5.5



Re: Long delays creating a netns after deleting one (possibly RCU related)

2016-11-18 Thread Jarno Rajahalme

> On Nov 14, 2016, at 3:09 PM, Eric Dumazet  wrote:
> 
> On Mon, 2016-11-14 at 14:46 -0800, Eric Dumazet wrote:
>> On Mon, 2016-11-14 at 16:12 -0600, Eric W. Biederman wrote:
>> 
>>> synchronize_rcu_expidited is not enough if you have multiple network
>>> devices in play.
>>> 
>>> Looking at the code it comes down to this commit, and it appears there
>>> is a promise add rcu grace period combining by Eric Dumazet.
>>> 
>>> Eric since people are hitting noticable stalls because of the rcu grace
>>> period taking a long time do you think you could look at this code path
>>> a bit more?
>>> 
>>> commit 93d05d4a320cb16712bb3d57a9658f395d8cecb9
>>> Author: Eric Dumazet 
>>> Date:   Wed Nov 18 06:31:03 2015 -0800
>> 
>> Absolutely, I will take a loop asap.
> 
> The worst offender should be fixed by the following patch.
> 
> busy poll needs to poll the physical device, not a virtual one...
> 
> diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
> index 
> d15214d673b2e8e08fd6437b572278fb1359f10d..2a1abbf8da74368cd01adc40cef6c0644e059ef2
>  100644
> --- a/include/net/gro_cells.h
> +++ b/include/net/gro_cells.h
> @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, 
> struct net_device *de
>   struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
> 
>   __skb_queue_head_init(>napi_skbs);
> +
> + set_bit(NAPI_STATE_NO_BUSY_POLL, >napi.state);
> +
>   netif_napi_add(dev, >napi, gro_cell_poll, 64);
>   napi_enable(>napi);
>   }
> 
> 
> 
> 
> 

This fixes the problem for me, so for whatever it’s worth:

Tested-by: Jarno Rajahalme 



Re: Long delays creating a netns after deleting one (possibly RCU related)

2016-11-18 Thread Jarno Rajahalme

> On Nov 14, 2016, at 3:09 PM, Eric Dumazet  wrote:
> 
> On Mon, 2016-11-14 at 14:46 -0800, Eric Dumazet wrote:
>> On Mon, 2016-11-14 at 16:12 -0600, Eric W. Biederman wrote:
>> 
>>> synchronize_rcu_expidited is not enough if you have multiple network
>>> devices in play.
>>> 
>>> Looking at the code it comes down to this commit, and it appears there
>>> is a promise add rcu grace period combining by Eric Dumazet.
>>> 
>>> Eric since people are hitting noticable stalls because of the rcu grace
>>> period taking a long time do you think you could look at this code path
>>> a bit more?
>>> 
>>> commit 93d05d4a320cb16712bb3d57a9658f395d8cecb9
>>> Author: Eric Dumazet 
>>> Date:   Wed Nov 18 06:31:03 2015 -0800
>> 
>> Absolutely, I will take a loop asap.
> 
> The worst offender should be fixed by the following patch.
> 
> busy poll needs to poll the physical device, not a virtual one...
> 
> diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
> index 
> d15214d673b2e8e08fd6437b572278fb1359f10d..2a1abbf8da74368cd01adc40cef6c0644e059ef2
>  100644
> --- a/include/net/gro_cells.h
> +++ b/include/net/gro_cells.h
> @@ -68,6 +68,9 @@ static inline int gro_cells_init(struct gro_cells *gcells, 
> struct net_device *de
>   struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
> 
>   __skb_queue_head_init(>napi_skbs);
> +
> + set_bit(NAPI_STATE_NO_BUSY_POLL, >napi.state);
> +
>   netif_napi_add(dev, >napi, gro_cell_poll, 64);
>   napi_enable(>napi);
>   }
> 
> 
> 
> 
> 

This fixes the problem for me, so for whatever it’s worth:

Tested-by: Jarno Rajahalme 



[PATCH 08/10] perf, tools: Expand PMU events by prefix match

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

When the user specifies a pmu directly, expand it automatically
with a prefix match, similar as we do for the normal aliases now.

This allows to specify attributes for duplicated boxes quickly.
For example uncore_cbox_{0,6}/.../ can be now specified as cbox/.../
and it gets automatically expanded.

Before

% perf stat -a -e uncore_cbox_0/event=0x35,umask=0x1,filter_opc=0x19C/,\
uncore_cbox_1/event=0x35,umask=0x1,filter_opc=0x19C/,\
uncore_cbox_2/event=0x35,umask=0x1,filter_opc=0x19C/,\
uncore_cbox_3/event=0x35,umask=0x1,filter_opc=0x19C/,\
uncore_cbox_4/event=0x35,umask=0x1,filter_opc=0x19C/,\
uncore_cbox_5/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1

After

perf stat -a -e cbox/event=0x35,umask=0x1,filter_opc=0x19C/ sleep 1

v2: Handle all bison rules. Move multi add code to separate function.
Handle uncore_ prefix correctly.
Signed-off-by: Andi Kleen 
---
 tools/perf/util/parse-events.c | 71 
 tools/perf/util/parse-events.h |  8 +
 tools/perf/util/parse-events.y | 73 +-
 3 files changed, 109 insertions(+), 43 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6dbcba7f0969..fba53ba22431 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1257,6 +1257,52 @@ int parse_events_add_pmu(struct parse_events_evlist 
*data,
return evsel ? 0 : -ENOMEM;
 }
 
+int parse_events_multi_pmu_add(struct parse_events_evlist *data,
+  char *str, struct list_head **listp)
+{
+   struct list_head *head;
+   struct parse_events_term *term;
+   struct list_head *list;
+   struct perf_pmu *pmu = NULL;
+   int ok = 0;
+
+   *listp = NULL;
+   /* Add it for all PMUs that support the alias */
+   list = malloc(sizeof(struct list_head));
+   if (!list)
+   return -1;
+   INIT_LIST_HEAD(list);
+   while ((pmu = perf_pmu__scan(pmu)) != NULL) {
+   struct perf_pmu_alias *alias;
+
+   list_for_each_entry(alias, >aliases, list) {
+   if (!strcasecmp(alias->name, str)) {
+   head = malloc(sizeof(struct list_head));
+   if (!head)
+   return -1;
+   INIT_LIST_HEAD(head);
+   if (parse_events_term__num(, 
PARSE_EVENTS__TERM_TYPE_USER,
+  str, 1, , NULL) 
< 0)
+   return -1;
+   list_add_tail(>list, head);
+
+   if (!parse_events_add_pmu(data, list,
+ pmu->name, head)) {
+   pr_debug("%s -> %s/%s/\n", str,
+pmu->name, alias->str);
+   ok++;
+   }
+
+   parse_events_terms__delete(head);
+   }
+   }
+   }
+   if (!ok)
+   return -1;
+   *listp = list;
+   return 0;
+}
+
 int parse_events__modifier_group(struct list_head *list,
 char *event_mod)
 {
@@ -2406,6 +2452,31 @@ int parse_events_term__clone(struct parse_events_term 
**new,
term->err_term, term->err_val);
 }
 
+int parse_events_copy_term_list(struct list_head *old,
+struct list_head **new)
+{
+   struct parse_events_term *term, *n;
+   int ret;
+
+   if (!old) {
+   *new = NULL;
+   return 0;
+   }
+
+   *new = malloc(sizeof(struct list_head));
+   if (!*new)
+   return -ENOMEM;
+   INIT_LIST_HEAD(*new);
+
+   list_for_each_entry (term, old, list) {
+   ret = parse_events_term__clone(, term);
+   if (ret)
+   return ret;
+   list_add_tail(>list, *new);
+   }
+   return 0;
+}
+
 void parse_events_terms__purge(struct list_head *terms)
 {
struct parse_events_term *term, *h;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index da246a3ddb69..33b3e52cd9b8 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -164,6 +164,14 @@ int parse_events_add_breakpoint(struct list_head *list, 
int *idx,
 int parse_events_add_pmu(struct parse_events_evlist *data,
 struct list_head *list, char *name,
 struct list_head *head_config);
+
+int parse_events_multi_pmu_add(struct parse_events_evlist *data,
+  char *str,
+  struct list_head **listp);
+
+int parse_events_copy_term_list(struct list_head 

[PATCH 01/10] perf, tools: Factor out scale conversion code

2016-11-18 Thread Andi Kleen
From: Andi Kleen 

Move the scale factor parsing code to an own function
to reuse it in an upcoming patch.

v2: Return error in case strdup returns NULL.
Signed-off-by: Andi Kleen 

squash! perf, tools: Factor out scale conversion code
---
 tools/perf/util/pmu.c | 70 ++-
 1 file changed, 41 insertions(+), 29 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index dc6ccaa4e927..500ab18d8658 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -94,32 +94,10 @@ static int pmu_format(const char *name, struct list_head 
*format)
return 0;
 }
 
-static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char 
*name)
+static int convert_scale(const char *scale, char **end, double *sval)
 {
-   struct stat st;
-   ssize_t sret;
-   char scale[128];
-   int fd, ret = -1;
-   char path[PATH_MAX];
char *lc;
-
-   snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
-
-   fd = open(path, O_RDONLY);
-   if (fd == -1)
-   return -1;
-
-   if (fstat(fd, ) < 0)
-   goto error;
-
-   sret = read(fd, scale, sizeof(scale)-1);
-   if (sret < 0)
-   goto error;
-
-   if (scale[sret - 1] == '\n')
-   scale[sret - 1] = '\0';
-   else
-   scale[sret] = '\0';
+   int ret = 0;
 
/*
 * save current locale
@@ -133,8 +111,8 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias 
*alias, char *dir, char *
 */
lc = strdup(lc);
if (!lc) {
-   ret = -ENOMEM;
-   goto error;
+   ret = -1;
+   goto out;
}
 
/*
@@ -144,14 +122,42 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias 
*alias, char *dir, char *
 */
setlocale(LC_NUMERIC, "C");
 
-   alias->scale = strtod(scale, NULL);
+   *sval = strtod(scale, end);
 
+out:
/* restore locale */
setlocale(LC_NUMERIC, lc);
-
free(lc);
+   return ret;
+}
 
-   ret = 0;
+static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char 
*name)
+{
+   struct stat st;
+   ssize_t sret;
+   char scale[128];
+   int fd, ret = -1;
+   char path[PATH_MAX];
+
+   snprintf(path, PATH_MAX, "%s/%s.scale", dir, name);
+
+   fd = open(path, O_RDONLY);
+   if (fd == -1)
+   return -1;
+
+   if (fstat(fd, ) < 0)
+   goto error;
+
+   sret = read(fd, scale, sizeof(scale)-1);
+   if (sret < 0)
+   goto error;
+
+   if (scale[sret - 1] == '\n')
+   scale[sret - 1] = '\0';
+   else
+   scale[sret] = '\0';
+
+   ret = convert_scale(scale, NULL, >scale);
 error:
close(fd);
return ret;
@@ -261,6 +267,12 @@ static int __perf_pmu__new_alias(struct list_head *list, 
char *dir, char *name,
alias->long_desc = long_desc ? strdup(long_desc) :
desc ? strdup(desc) : NULL;
alias->topic = topic ? strdup(topic) : NULL;
+   if (unit) {
+   if (convert_scale(unit, , >scale) < 0)
+   return -1;
+   snprintf(alias->unit, sizeof(alias->unit), "%s", unit);
+   }
+   alias->str = strdup(val);
 
list_add_tail(>list, list);
 
-- 
2.5.5



  1   2   3   4   5   6   7   8   9   10   >