Re: [PATCH 4/9] powerpc: inline ip_fast_csum()

2015-09-22 Thread Denis Kirjanov
On 9/22/15, Christophe Leroy  wrote:
> In several architectures, ip_fast_csum() is inlined
> There are functions like ip_send_check() which do nothing
> much more than calling ip_fast_csum().
> Inlining ip_fast_csum() allows the compiler to optimise better

Hi Christophe,
I did try it and see no difference on ppc64. Did you test with socklib
with modified loopback and if so do you have any numbers?
>
> Suggested-by: Eric Dumazet 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/powerpc/include/asm/checksum.h | 46
> +++--
>  arch/powerpc/lib/checksum_32.S  | 21 -
>  arch/powerpc/lib/checksum_64.S  | 27 --
>  arch/powerpc/lib/ppc_ksyms.c|  1 -
>  4 files changed, 39 insertions(+), 56 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/checksum.h
> b/arch/powerpc/include/asm/checksum.h
> index afa6722..56deea8 100644
> --- a/arch/powerpc/include/asm/checksum.h
> +++ b/arch/powerpc/include/asm/checksum.h
> @@ -9,16 +9,9 @@
>   * 2 of the License, or (at your option) any later version.
>   */
>
> -/*
> - * This is a version of ip_compute_csum() optimized for IP headers,
> - * which always checksum on 4 octet boundaries.  ihl is the number
> - * of 32-bit words and is always >= 5.
> - */
>  #ifdef CONFIG_GENERIC_CSUM
>  #include 
>  #else
> -extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
> -
>  /*
>   * computes the checksum of a memory block at buff, length len,
>   * and adds in "sum" (32-bit)
> @@ -137,6 +130,45 @@ static inline __wsum csum_add(__wsum csum, __wsum
> addend)
>  #endif
>  }
>
> +/*
> + * This is a version of ip_compute_csum() optimized for IP headers,
> + * which always checksum on 4 octet boundaries.  ihl is the number
> + * of 32-bit words and is always >= 5.
> + */
> +static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int
> ihl)
> +{
> + u32 *ptr = (u32 *)iph + 1;
> +#ifdef __powerpc64__
> + unsigned int i;
> + u64 s = *(__force u32 *)iph;
> +
> + for (i = 0; i < ihl - 1; i++, ptr++)
> + s += *ptr;
> + s += (s >> 32);
> + return (__force __wsum)s;
> +
> +#else
> + __wsum sum, tmp;
> +
> + asm("mtctr %3;"
> + "addc %0,%4,%5;"
> + "1:lwzu %1, 4(%2);"
> + "adde %0,%0,%1;"
> + "bdnz 1b;"
> + "addze %0,%0;"
> + : "=r"(sum), "=r"(tmp), "+b"(ptr)
> + : "r"(ihl - 2), "r"(*(u32 *)iph), "r"(*ptr)
> + : "ctr", "xer", "memory");
> +
> + return sum;
> +#endif
> +}
> +
> +static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
> +{
> + return csum_fold(ip_fast_csum_nofold(iph, ihl));
> +}
> +
>  #endif
>  #endif /* __KERNEL__ */
>  #endif
> diff --git a/arch/powerpc/lib/checksum_32.S
> b/arch/powerpc/lib/checksum_32.S
> index 6d67e05..0d7eba3 100644
> --- a/arch/powerpc/lib/checksum_32.S
> +++ b/arch/powerpc/lib/checksum_32.S
> @@ -20,27 +20,6 @@
>   .text
>
>  /*
> - * ip_fast_csum(buf, len) -- Optimized for IP header
> - * len is in words and is always >= 5.
> - */
> -_GLOBAL(ip_fast_csum)
> - lwz r0,0(r3)
> - lwzur5,4(r3)
> - addic.  r4,r4,-2
> - addcr0,r0,r5
> - mtctr   r4
> - blelr-
> -1:   lwzur4,4(r3)
> - adder0,r0,r4
> - bdnz1b
> - addze   r0,r0   /* add in final carry */
> - rlwinm  r3,r0,16,0,31   /* fold two halves together */
> - add r3,r0,r3
> - not r3,r3
> - srwir3,r3,16
> - blr
> -
> -/*
>   * computes the checksum of a memory block at buff, length len,
>   * and adds in "sum" (32-bit)
>   *
> diff --git a/arch/powerpc/lib/checksum_64.S
> b/arch/powerpc/lib/checksum_64.S
> index f3ef354..f53f4ab 100644
> --- a/arch/powerpc/lib/checksum_64.S
> +++ b/arch/powerpc/lib/checksum_64.S
> @@ -18,33 +18,6 @@
>  #include 
>
>  /*
> - * ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
> - * len is in words and is always >= 5.
> - *
> - * In practice len == 5, but this is not guaranteed.  So this code does
> not
> - * attempt to use doubleword instructions.
> - */
> -_GLOBAL(ip_fast_csum)
> - lwz r0,0(r3)
> - lwzur5,4(r3)
> - addic.  r4,r4,-2
> - addcr0,r0,r5
> - mtctr   r4
> - blelr-
> -1:   lwzur4,4(r3)
> - adder0,r0,r4
> - bdnz1b
> - addze   r0,r0   /* add in final carry */
> -rldicl  r4,r0,32,0  /* fold two 32-bit halves together */
> -add r0,r0,r4
> -srdir0,r0,32
> - rlwinm  r3,r0,16,0,31   /* fold two halves together */
> - add r3,r0,r3
> - not r3,r3
> - srwir3,r3,16
> - blr
> -
> -/*
>   * Computes the checksum of a memory block at buff, length len,
>   * and adds in "sum" (32-bit).
>   *
> diff --git a/arch/powerpc/lib/ppc_ksyms.c b/arch/powerpc/lib/ppc_ksyms.c
> index f5e427e..8cd5c0b 100644
> --- a/arch/powerpc/lib/ppc_ksyms.c
> +++ b/arch/powerpc/lib/ppc_ksyms.c
> @@ -19,7 +19,6 @@ 

Re: [PATCH] kexec: Add prefix "kexec" to output message

2015-09-22 Thread Minfei Huang
On 09/23/15 at 01:26pm, Baoquan He wrote:
> On 09/23/15 at 01:14pm, Minfei Huang wrote:
> > On 09/23/15 at 10:49am, Baoquan He wrote:
> > > This is weird, user really don't need to know each file. I saw you added
> > > a new file kexec_internal.h and all three files includes it. Why not doing
> > > it there to make it the same as before?
> > 
> > The output message format is incorrect, if other *.c (except for kexec*)
> > include the kexec_internal.h.
> 
> No, I assume Dave added kexec_internal.h not to be used by other *.c.
> It should be only used by kexec*.c
> 
> > 
> > #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> > 
> > How about defining the format like above in each *.c?

Ok. I can split this patch as two patches One for cleanup, other to fix
the prefix issue.

Thanks
Minfei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 2/2] powerpc: Add ppc64le_defconfig

2015-09-22 Thread Michael Ellerman
Based directly on ppc64_defconfig using merge_config.

Signed-off-by: Michael Ellerman 
---
 arch/powerpc/Makefile | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index b9b4af2af9a5..3704db45a832 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -288,6 +288,10 @@ PHONY += pseries_le_defconfig
 pseries_le_defconfig:
$(call merge_into_defconfig,pseries_defconfig,le)
 
+PHONY += ppc64le_defconfig
+ppc64le_defconfig:
+   $(call merge_into_defconfig,ppc64_defconfig,le)
+
 PHONY += mpc85xx_defconfig
 mpc85xx_defconfig:
$(call merge_into_defconfig,mpc85xx_basic_defconfig,\
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[RFC PATCH 1/2] scripts/kconfig/Makefile: Allow KBUILD_DEFCONFIG to be a target

2015-09-22 Thread Michael Ellerman
Arch Makefiles can set KBUILD_DEFCONFIG to tell kbuild the name of the
defconfig that should be built by default.

However currently there is an assumption that KBUILD_DEFCONFIG points to
a file at arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG).

We would like to use a target, using merge_config, as our defconfig, so
adapt the logic in scripts/kconfig/Makefile to allow that.

To minimise the chance of breaking anything, we first check if
KBUILD_DEFCONFIG is a file, and if so we do the old logic. If it's not a
file, then we call the top-level Makefile with KBUILD_DEFCONFIG as the
target.

Signed-off-by: Michael Ellerman 
---
 scripts/kconfig/Makefile | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/scripts/kconfig/Makefile b/scripts/kconfig/Makefile
index aceaaed09811..b2b9c87cec50 100644
--- a/scripts/kconfig/Makefile
+++ b/scripts/kconfig/Makefile
@@ -96,9 +96,12 @@ savedefconfig: $(obj)/conf
 defconfig: $(obj)/conf
 ifeq ($(KBUILD_DEFCONFIG),)
$< $(silent) --defconfig $(Kconfig)
-else
+else ifneq ($(wildcard arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG)),)
@$(kecho) "*** Default configuration is based on '$(KBUILD_DEFCONFIG)'"
$(Q)$< $(silent) 
--defconfig=arch/$(SRCARCH)/configs/$(KBUILD_DEFCONFIG) $(Kconfig)
+else
+   @$(kecho) "*** Default configuration is based on target 
'$(KBUILD_DEFCONFIG)'"
+   $(Q)$(MAKE) -f $(srctree)/Makefile $(KBUILD_DEFCONFIG)
 endif
 
 %_defconfig: $(obj)/conf
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 13/13] Staging: rtl8712: Coding style warning fix for block comment

2015-09-22 Thread Sudip Mukherjee
On Wed, Sep 23, 2015 at 02:03:56AM +0530, Punit Vara wrote:
> This patch is to rtl8712_gp_bitdef.h file that fixes up following
> warning reported by checkpatch :
> 
> -Block comments use a trailing */ on a separate line
> 
> Signed-off-by: Punit Vara 
> ---
I am getting confused. One of your previous patch showed PATCH 5/6, this
one shows 13/13, but i am not seeing the other patches of these two
series. Is it the problem with my mail filters or you are not sending?

regards
sudip
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of redundant mpic_irq_set_wake

2015-09-22 Thread Wang Dongsheng


> -Original Message-
> From: linux-kernel-ow...@vger.kernel.org [mailto:linux-kernel-
> ow...@vger.kernel.org] On Behalf Of Thomas Gleixner
> Sent: Wednesday, September 23, 2015 11:49 AM
> To: Wang Dongsheng-B40534
> Cc: Wood Scott-B07421; Sudeep Holla; linux...@vger.kernel.org; linux-
> ker...@vger.kernel.org; Rafael J. Wysocki; Benjamin Herrenschmidt; Paul
> Mackerras; Michael Ellerman; Jia Hongtao-B38951; Marc Zyngier; linuxppc-
> d...@lists.ozlabs.org
> Subject: RE: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of
> redundant mpic_irq_set_wake
> 
> On Wed, 23 Sep 2015, Wang Dongsheng wrote:
> > > On Mon, 2015-09-21 at 16:47 +0100, Sudeep Holla wrote:
> > > > mpic_irq_set_wake return -ENXIO for non FSL MPIC and sets 
> > > > IRQF_NO_SUSPEND
> > > > flag for FSL ones. enable_irq_wake already returns -ENXIO if irq_set_wak
> > > > is not implemented. Also there's no need to set the IRQF_NO_SUSPEND flag
> > > > as it doesn't guarantee wakeup for that interrupt.
> > > >
> >
> > Non-freescale return -ENXIO, is there any issue? If non-freescale
> > platform does not support it, but IPs still use
> > enable/disable_irq_wake, we should return a error number.
> 
> You can just set IRQCHIP_SKIP_SET_WAKE for FSL chips and not for the
> others.
> 
> > @Scott:
> > If set this flag we cannot keep a irq as a wakeup source when system going 
> > to
> > SUSPEND or MEM.
> >
> > irq_set_wake() means we can set this irq as a wake source.
> > IRQCHIP_SKIP_SET_WAKE is ignore irq_set_wake() feature.
> 
> Nonsense. IRQCHIP_SKIP_SET_WAKE merily tells the core not to bail on
> !chip->irq_set_wake(), but its still marking the interrupt as wakeup
> source and therefor not masking it on suspend.
> 

Sorry, I just check irq_set_irq_wake() code, right, IRQCHIP_SKIP_SET_WAKE also 
can
going to irqd_set to mask IRQD_WAKEUP_STATE.

Yes, this flag just skip the irq_set_wake() not this feature.

Regards,
-Dongsheng

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE muram

2015-09-22 Thread Zhao Qiang
On Wen, Sep 23, 2015 at 12:03 AM +0800, Wood Scott-B07421 wrote:

> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, September 23, 2015 12:03 PM
> To: Zhao Qiang-B45475
> Cc: linux-kernel@vger.kernel.org; linuxppc-...@lists.ozlabs.org;
> lau...@codeaurora.org; Xie Xiaobo-R63061; b...@kernel.crashing.org; Li
> Yang-Leo-R58472; pau...@samba.org
> Subject: Re: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE muram
> 
> On Tue, 2015-09-22 at 21:20 -0500, Zhao Qiang-B45475 wrote:
> > On Wen, Sep 23, 2015 at 8:19 AM +0800, Wood Scott-B07421 wrote:
> >
> > > > > >  {
> > > > > > - int ret;
> > > > > > +
> > > > > > + unsigned long start;
> > > > > >   unsigned long flags;
> > > > > > + unsigned long size_alloc = size; struct muram_block *entry;
> > > > > > + int end_bit; int order = muram_pool->min_alloc_order;
> > > > > >
> > > > > >   spin_lock_irqsave(_muram_lock, flags);
> > > > > > - ret = rh_free(_muram_info, offset);
> > > > > > + end_bit = (offset >> order) + ((size + (1UL << order) - 1)
> > > > > > + >>
> > > > > order);
> > > > > > + if ((offset + size) > (end_bit << order))
> > > > > > + size_alloc = size + (1UL << order);
> > > > >
> > > > > Why do you need to do all these calculations here?
> > > >
> > > > So do it in gen_pool_fixed_alloc?
> > >
> > > Could you explain why they're needed at all?
> >
> > Why it does the calculations?
> > If the min block of gen_pool is 8 bytes, and I want to allocate a
> > Region with offset=7, size=8bytes, I actually need block 0 and block
> > 1, And the allocation will give me block 0.
> 
> How can you have offset 7 if the minimum order is 2 bytes?

Offset has no relationship with minimum order, it is not decided by minimum 
order.
I want to allocate a specific region with offset=7, then algo to calculate the 
block bit.
And I just take it for example, it is not I really need to region offset=7.

So, now minimum order is 2 bytes. If offset=7, size=4bytes needed, it actually 
allocate 6-12 to me.
so I need to check if it is necessary to plus a block(2bytes) to size before 
allocation.  

-Zhao
N�r��yb�X��ǧv�^�)޺{.n�+{zX����ܨ}���Ơz�:+v���zZ+��+zf���h���~i���z��w���?�&�)ߢf��^jǫy�m��@A�a���
0��h���i

Re: [PATCH] kexec: Add prefix "kexec" to output message

2015-09-22 Thread Baoquan He
On 09/23/15 at 01:14pm, Minfei Huang wrote:
> On 09/23/15 at 10:49am, Baoquan He wrote:
> > This is weird, user really don't need to know each file. I saw you added
> > a new file kexec_internal.h and all three files includes it. Why not doing
> > it there to make it the same as before?
> 
> The output message format is incorrect, if other *.c (except for kexec*)
> include the kexec_internal.h.

No, I assume Dave added kexec_internal.h not to be used by other *.c.
It should be only used by kexec*.c

> 
> #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
> 
> How about defining the format like above in each *.c?
> 
> Thanks
> Minfei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFT v3] eata: Convert eata driver as normal PCI and platform device drivers

2015-09-22 Thread Jiang Liu
On 2015/9/23 7:36, Arthur Marsh wrote:
> James Bottomley wrote on 23/09/15 08:15:
>> On Wed, 2015-09-23 at 07:55 +0930, Arthur Marsh wrote:
>> It looks to be some problem in shut down.  Can you simply remove and
>> re-insert the driver successfully?  If it's your root disk driver,
>> you'll have to do this from an initrd so as not to have root mounted
>> from the eata controller.
>>
>> If the remove and reinsert fails, it means we have a problem in the
>> driver shut down.  If not, it's likely something kexec related.
>>
>> James
> 
> OK, it looks like there was a problem with unloading the driver.
> 
> After un-mounting file systems on the disk attached to the SCSI
> controller using the eata driver I could do a:
> 
> modprobe -r eata
> 
> but received the output of the attached dmesg log.
> 
> Attempting to do
> 
> modprobe eata
> 
> after the previous modprobe -r eata resulted in a complete lock-up.
Hi Arthur,
I have found the cause of the warning messages, it's caused
by a flaw in the conversion. But according to my understanding,
it isn't related to the kexec/kdump failure. Could you please help
to test the attached new version?
Thanks!
Gerry

> 
> Arthur.
>From 2231506adf7da0944fac82ec38040cc2f70562f7 Mon Sep 17 00:00:00 2001
From: Jiang Liu 
Date: Tue, 22 Sep 2015 10:16:20 +0800
Subject: [Bugfix v3] eata: Convert eata driver as normal PCI and platform
 device drivers
To: Thomas Gleixner ,
Bjorn Helgaas ,
Arthur Marsh ,
Hannes Reinecke ,
Ballabio, Dario ,
Christoph Hellwig 
Cc: linux-kernel@vger.kernel.org,
linux-...@vger.kernel.org,
linux-s...@vger.kernel.org,
x...@kernel.org

Previously the eata driver just grabs and accesses eata PCI devices
without implementing a PCI device driver, that causes troubles with
latest IRQ related

Commit 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and
pcibios_free_irq()") changes the way to allocate PCI legacy IRQ
for PCI devices on x86 platforms. Instead of allocating PCI legacy
IRQs when pcibios_enable_device() gets called, now pcibios_alloc_irq()
will be called by pci_device_probe() to allocate PCI legacy IRQs
when binding PCI drivers to PCI devices.

But the eata driver directly accesses PCI devices without implementing
corresponding PCI drivers, so pcibios_alloc_irq() won't be called for
those PCI devices and wrong IRQ number may be used to manage the PCI
device.

This patch implements a PCI device driver to manage eata PCI devices,
so eata driver could properly cooperate with the PCI core. It also
provides headroom for PCI hotplug with eata driver.

It also represents non-PCI eata devices as platform devices, so it could
be managed as normal devices.

Signed-off-by: Jiang Liu 
Cc: Hannes Reinecke 
Cc: Ballabio, Dario 
Cc: Christoph Hellwig 
---
 drivers/scsi/eata.c |  451 +++
 1 file changed, 272 insertions(+), 179 deletions(-)

diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index 227dd2c2ec2f..11813a72c2e9 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -486,6 +486,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -503,8 +505,6 @@
 #include 
 #include 
 
-static int eata2x_detect(struct scsi_host_template *);
-static int eata2x_release(struct Scsi_Host *);
 static int eata2x_queuecommand(struct Scsi_Host *, struct scsi_cmnd *);
 static int eata2x_eh_abort(struct scsi_cmnd *);
 static int eata2x_eh_host_reset(struct scsi_cmnd *);
@@ -513,9 +513,9 @@ static int eata2x_bios_param(struct scsi_device *, struct block_device *,
 static int eata2x_slave_configure(struct scsi_device *);
 
 static struct scsi_host_template driver_template = {
+	.module = THIS_MODULE,
+	.proc_name = "eata2x",
 	.name = "EATA/DMA 2.0x rev. 8.10.00 ",
-	.detect = eata2x_detect,
-	.release = eata2x_release,
 	.queuecommand = eata2x_queuecommand,
 	.eh_abort_handler = eata2x_eh_abort,
 	.eh_host_reset_handler = eata2x_eh_host_reset,
@@ -834,12 +834,10 @@ struct hostdata {
 	struct mssp sp;		/* Local copy of sp buffer */
 };
 
-static struct Scsi_Host *sh[MAX_BOARDS];
 static const char *driver_name = "EATA";
-static char sha[MAX_BOARDS];
-
-/* Initialize num_boards so that ihdlr can work while detect is in progress */
-static unsigned int num_boards = MAX_BOARDS;
+static DEFINE_IDA(eata2x_ida);
+static struct platform_device *eata2x_platform_devs[MAX_BOARDS];
+static bool eata2x_platform_driver_registered;
 
 static unsigned long io_port[] = {
 
@@ -850,10 +848,6 @@ static unsigned long io_port[] = {
 	/* First ISA */
 	0x1f0,
 
-	/* Space for MAX_PCI ports possibly reported by PCI_BIOS */
-	SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
-	SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP, SKIP,
-
 	/* MAX_EISA ports */
 	0x1c88, 0x2c88, 0x3c88, 0x4c88, 0x5c88, 0x6c88, 0x7c88, 0x8c88,
 	0x9c88, 0xac88, 0xbc88, 0xcc88, 0xdc88, 0xec88, 0xfc88,
@@ -1024,74 +1018,42 @@ static int read_pio(unsigned long iobase, ushort * start, ushort * 

Re: [PATCH] kexec: Add prefix "kexec" to output message

2015-09-22 Thread Minfei Huang
On 09/23/15 at 10:49am, Baoquan He wrote:
> On 09/23/15 at 09:37am, Dave Young wrote:
> > > > Hi, Dave.
> > > > 
> > > > How about removing all of the prefix "crashkernel" in kexec_core. Thus
> > > > we can be consistent with the output message prefix "kexec".
> > > 
> > > Ping, any comment is appreciate and helpful.
> > 
> > Remove "crashkernel" sounds not a proper way, it indicates crashkernel 
> > parsing
> > messages. I have no idea what is the best way but below modification sounds 
> > better to me:
> > 
> > kexec_core.c:
> > 
> > #define pr_fmt(fmt)"[kexec_core] " fmt
> > Also remove below prefix "Kexec:"
> > pr_warn("Kexec: Memory allocation for saving cpu register states 
> > failed\n"); 
> > 
> > kexec.c:
> > #define pr_fmt(fmt)"[kexec] " fmt
> > 
> > kexec_file.c:
> > #define pr_fmt(fmt)"[kexec_file] " fmt
> 
> This is weird, user really don't need to know each file. I saw you added
> a new file kexec_internal.h and all three files includes it. Why not doing
> it there to make it the same as before?

The output message format is incorrect, if other *.c (except for kexec*)
include the kexec_internal.h.

#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

How about defining the format like above in each *.c?

Thanks
Minfei
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] net/wireless: enable wiphy device to suspend/resume asynchronously

2015-09-22 Thread Fu, Zhonghui

Hi johannes,

Could you please tell me which kernel version will merge this patch?


Thanks,
Zhonghui

 
On 2015/9/22 22:05, Johannes Berg wrote:
> On Sat, 2015-09-19 at 10:40 +0800, Fu, Zhonghui wrote:
>> Now, PM core supports asynchronous suspend/resume mode for devices
>> during system suspend/resume, and the power state transition of one
>> device may be completed in separate kernel thread. PM core ensures
>> all power state transition timing dependency between devices. This
>> patch enables wiphy device to suspend/resume asynchronously. This can
>> take advantage of multicore and improve system suspend/resume speed.
>>
> Applied.
>
> johannes
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] perf record: Synthesize COMM event for a command line workload

2015-09-22 Thread Namhyung Kim
On Wed, Sep 23, 2015 at 12:09:20AM -0300, Arnaldo Carvalho de Melo wrote:
> Le 23 sept. 2015 12:04 AM, "Namhyung Kim"  a écrit :
> >
> > Hi Arnaldo,
> >
> > On Tue, Sep 22, 2015 at 04:48:25PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Humm, I think it is working by accident, as you're not allocating space
> > > for machine->id_hdr_size, please take a look at
> > > perf_event__synthesize_thread_map().
> > >
> > > Right now its not a problem this line in perf_event__prepare_comm():
> > >
> > > memset(event->comm.comm + size, 0, machine->id_hdr_size);
> > >
> > > Because perf_event is an union and some of its elements, like mmap/mmap2
> > > have that PATH_MAX part, but its just a matter of the id_hdr_size
> > > becoming bigger than that and we'll have a problem...
> >
> > Right. I'll send a fix to include the id_hdr part.
> 
> Thanks!


>From 10123021c4e55f14d3bb8e6cc576694bb76d6699 Mon Sep 17 00:00:00 2001
From: Namhyung Kim 
Date: Wed, 23 Sep 2015 11:51:40 +0900
Subject: [PATCH] perf record: Allocate area for sample_id_hdr in a synthesized
 comm event

A previous patch added a synthesized comm event for forked child
process but it missed that the event should contain area for
sample_id_hdr at the end.  It worked by accident since the perf_event
union contains bigger event structs like mmap_events.

This patch fixes it by dynamically allocating event struct including
those area like in perf_event__synthesize_thread_map().

Reported-by: Arnaldo Carvalho de Melo 
Signed-off-by: Namhyung Kim 
---
 tools/perf/builtin-record.c | 13 ++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 6d8302d4612f..5e01c070dbf2 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -637,7 +637,13 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
 * Let the child rip
 */
if (forks) {
-   union perf_event event;
+   union perf_event *event;
+
+   event = malloc(sizeof(event->comm) + machine->id_hdr_size);
+   if (event == NULL) {
+   err = -ENOMEM;
+   goto out_child;
+   }
 
/*
 * Some H/W events are generated before COMM event
@@ -645,10 +651,11 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
 * cannot see a correct process name for those events.
 * Synthesize COMM event to prevent it.
 */
-   perf_event__synthesize_comm(tool, ,
+   perf_event__synthesize_comm(tool, event,
rec->evlist->workload.pid,
process_synthesized_event,
-   >machines.host);
+   machine);
+   free(event);
 
perf_evlist__start_workload(rec->evlist);
}
-- 
2.5.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] dcache: Deduplicate code that sets up dentry_hashtable

2015-09-22 Thread Eric Biggers
Make both dcache_init_early() and dcache_init() call a new helper
function, dcache_alloc_hashtable().  Also address a small inefficiency by
moving the table length calculation outside of the loop condition.  gcc
apparently doesn't do that because it assumes that the memory pointed to
by 'dentry_hashtable' might alias 'd_hash_shift'.

Signed-off-by: Eric Biggers 
---
 fs/dcache.c | 48 +++-
 1 file changed, 19 insertions(+), 29 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index 5c33aeb..7cfe848 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -3380,35 +3380,39 @@ static int __init set_dhash_entries(char *str)
 }
 __setup("dhash_entries=", set_dhash_entries);
 
-static void __init dcache_init_early(void)
+static void __init dcache_alloc_hashtable(int flags)
 {
-   unsigned int loop;
-
-   /* If hashes are distributed across NUMA nodes, defer
-* hash allocation until vmalloc space is available.
-*/
-   if (hashdist)
-   return;
+   unsigned int num_entries;
+   unsigned int i;
 
dentry_hashtable =
alloc_large_system_hash("Dentry cache",
sizeof(struct hlist_bl_head),
dhash_entries,
13,
-   HASH_EARLY,
+   flags,
_hash_shift,
_hash_mask,
0,
0);
 
-   for (loop = 0; loop < (1U << d_hash_shift); loop++)
-   INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
+   num_entries = 1U << d_hash_shift;
+
+   for (i = 0; i < num_entries; i++)
+   INIT_HLIST_BL_HEAD(_hashtable[i]);
 }
 
-static void __init dcache_init(void)
+static void __init dcache_init_early(void)
 {
-   unsigned int loop;
+   /* If hashes are distributed across NUMA nodes, defer
+* hash allocation until vmalloc space is available.
+*/
+   if (!hashdist)
+   dcache_alloc_hashtable(HASH_EARLY);
+}
 
+static void __init dcache_init(void)
+{
/* 
 * A constructor could be added for stable state like the lists,
 * but it is probably not worth it because of the cache nature
@@ -3418,22 +3422,8 @@ static void __init dcache_init(void)
SLAB_RECLAIM_ACCOUNT|SLAB_PANIC|SLAB_MEM_SPREAD);
 
/* Hash may have been set up in dcache_init_early */
-   if (!hashdist)
-   return;
-
-   dentry_hashtable =
-   alloc_large_system_hash("Dentry cache",
-   sizeof(struct hlist_bl_head),
-   dhash_entries,
-   13,
-   0,
-   _hash_shift,
-   _hash_mask,
-   0,
-   0);
-
-   for (loop = 0; loop < (1U << d_hash_shift); loop++)
-   INIT_HLIST_BL_HEAD(dentry_hashtable + loop);
+   if (hashdist)
+   dcache_alloc_hashtable(0);
 }
 
 /* SLAB cache for __getname() consumers */
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 02/15] hugetlb: fix compile error on tile

2015-09-22 Thread Dan Williams
Inlude asm/pgtable.h to get the definition for pud_t to fix:

include/linux/hugetlb.h:203:29: error: unknown type name 'pud_t'

Signed-off-by: Dan Williams 
---
 include/linux/hugetlb.h |1 +
 1 file changed, 1 insertion(+)

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5e35379f58a5..ad5539cf52bf 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 #include 
+#include 
 
 struct ctl_table;
 struct user_struct;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 09/15] block, pmem: fix null pointer de-reference on shutdown, check for queue death

2015-09-22 Thread Dan Williams
After the driver has been unbound the queue is dead and the private data
pointer is invalid.  Check that the queue is still alive, or otherwise
pin it active before using queuedata.

Fixes crash signatures like the following.

 BUG: unable to handle kernel paging request at 88014000
 [..]
 Call Trace:
  [] ? copy_user_handle_tail+0x5f/0x70
  [] pmem_do_bvec.isra.11+0x70/0xf0 [nd_pmem]
  [] pmem_make_request+0xd1/0x200 [nd_pmem]
  [] ? mempool_alloc+0x72/0x1a0
  [] generic_make_request+0xd6/0x110
  [] submit_bio+0x76/0x170
  [] submit_bh_wbc+0x12f/0x160
  [] submit_bh+0x12/0x20
  [] jbd2_write_superblock+0x8d/0x170
  [] jbd2_mark_journal_empty+0x5d/0x90
  [] jbd2_journal_destroy+0x24b/0x270
  [] ? put_pwq_unlocked+0x2a/0x30
  [] ? destroy_workqueue+0x225/0x250
  [] ext4_put_super+0x64/0x360
  [] generic_shutdown_super+0x6a/0xf0

Cc: Jens Axboe 
Cc: Christoph Hellwig 
Cc: Dave Chinner 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 block/blk-core.c  |2 ++
 drivers/nvdimm/pmem.c |8 
 2 files changed, 10 insertions(+)

diff --git a/block/blk-core.c b/block/blk-core.c
index 13764f8b22e0..0ea7d285b886 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -532,11 +532,13 @@ int blk_dax_get(struct request_queue *q)
 {
return blk_qref_enter(>dax_ref, GFP_NOWAIT);
 }
+EXPORT_SYMBOL(blk_dax_get);
 
 void blk_dax_put(struct request_queue *q)
 {
percpu_ref_put(>dax_ref.count);
 }
+EXPORT_SYMBOL(blk_dax_put);
 
 static void blk_dax_freeze(struct request_queue *q)
 {
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index a01611d8f351..3ee02af73ad0 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -73,6 +73,12 @@ static void pmem_make_request(struct request_queue *q, 
struct bio *bio)
struct block_device *bdev = bio->bi_bdev;
struct pmem_device *pmem = bdev->bd_disk->private_data;
 
+   if (blk_dax_get(q) != 0) {
+   bio->bi_error = -ENODEV;
+   bio_endio(bio);
+   return;
+   }
+
do_acct = nd_iostat_start(bio, );
bio_for_each_segment(bvec, bio, iter)
pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
@@ -84,6 +90,8 @@ static void pmem_make_request(struct request_queue *q, struct 
bio *bio)
wmb_pmem();
 
bio_endio(bio);
+
+   blk_dax_put(q);
 }
 
 static int pmem_rw_page(struct block_device *bdev, sector_t sector,

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 10/15] block, dax: fix lifetime of in-kernel dax mappings

2015-09-22 Thread Dan Williams
The DAX implementation needs to protect new calls to ->direct_access()
and usage of its return value against unbind of the underlying block
device.  Use blk_dax_{get|put}() to either prevent blk_cleanup_queue()
from proceeding, or fail the dax_map_bh() if the request_queue is being
torn down.

Cc: Jens Axboe 
Cc: Christoph Hellwig 
Cc: Boaz Harrosh 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 fs/dax.c |  131 --
 1 file changed, 84 insertions(+), 47 deletions(-)

diff --git a/fs/dax.c b/fs/dax.c
index bcfb14bfc1e4..358eea39e982 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -63,12 +63,43 @@ int dax_clear_blocks(struct inode *inode, sector_t block, 
long size)
 }
 EXPORT_SYMBOL_GPL(dax_clear_blocks);
 
-static long dax_get_addr(struct buffer_head *bh, void __pmem **addr,
-   unsigned blkbits)
+static void __pmem *__dax_map_bh(const struct buffer_head *bh, unsigned 
blkbits,
+   unsigned long *pfn, long *len)
 {
-   unsigned long pfn;
+   long rc;
+   void __pmem *addr;
+   struct block_device *bdev = bh->b_bdev;
+   struct request_queue *q = bdev->bd_queue;
sector_t sector = bh->b_blocknr << (blkbits - 9);
-   return bdev_direct_access(bh->b_bdev, sector, addr, , bh->b_size);
+
+   rc = blk_dax_get(q);
+   if (rc < 0)
+   return (void __pmem *) ERR_PTR(rc);
+   rc = bdev_direct_access(bdev, sector, , pfn, bh->b_size);
+   if (len)
+   *len = rc;
+   if (rc < 0) {
+   blk_dax_put(q);
+   return (void __pmem *) ERR_PTR(rc);
+   }
+   return addr;
+}
+
+static void __pmem *dax_map_bh(const struct buffer_head *bh, unsigned blkbits)
+{
+   unsigned long pfn;
+
+   return __dax_map_bh(bh, blkbits, , NULL);
+}
+
+static void dax_unmap_bh(const struct buffer_head *bh, void __pmem *addr)
+{
+   struct block_device *bdev = bh->b_bdev;
+   struct request_queue *q = bdev->bd_queue;
+
+   if (IS_ERR(addr))
+   return;
+   blk_dax_put(q);
 }
 
 /* the clear_pmem() calls are ordered by a wmb_pmem() in the caller */
@@ -104,15 +135,16 @@ static ssize_t dax_io(struct inode *inode, struct 
iov_iter *iter,
  loff_t start, loff_t end, get_block_t get_block,
  struct buffer_head *bh)
 {
-   ssize_t retval = 0;
-   loff_t pos = start;
-   loff_t max = start;
-   loff_t bh_max = start;
-   void __pmem *addr;
+   loff_t pos = start, max = start, bh_max = start;
+   int rw = iov_iter_rw(iter), rc;
+   long map_len = 0;
+   unsigned long pfn;
+   void __pmem *addr = NULL;
+   void __pmem *kmap = (void __pmem *) ERR_PTR(-EIO);
bool hole = false;
bool need_wmb = false;
 
-   if (iov_iter_rw(iter) != WRITE)
+   if (rw == READ)
end = min(end, i_size_read(inode));
 
while (pos < end) {
@@ -127,9 +159,8 @@ static ssize_t dax_io(struct inode *inode, struct iov_iter 
*iter,
if (pos == bh_max) {
bh->b_size = PAGE_ALIGN(end - pos);
bh->b_state = 0;
-   retval = get_block(inode, block, bh,
-  iov_iter_rw(iter) == WRITE);
-   if (retval)
+   rc = get_block(inode, block, bh, rw == WRITE);
+   if (rc)
break;
if (!buffer_size_valid(bh))
bh->b_size = 1 << blkbits;
@@ -141,21 +172,25 @@ static ssize_t dax_io(struct inode *inode, struct 
iov_iter *iter,
bh->b_size -= done;
}
 
-   hole = iov_iter_rw(iter) != WRITE && 
!buffer_written(bh);
+   hole = rw == READ && !buffer_written(bh);
if (hole) {
addr = NULL;
size = bh->b_size - first;
} else {
-   retval = dax_get_addr(bh, , blkbits);
-   if (retval < 0)
+   dax_unmap_bh(bh, kmap);
+   kmap = __dax_map_bh(bh, blkbits, , 
_len);
+   if (IS_ERR(kmap)) {
+   rc = PTR_ERR(kmap);
break;
+   }
+   addr = kmap;
if (buffer_unwritten(bh) || buffer_new(bh)) {
-   dax_new_buf(addr, retval, first, pos,
-   end);
+   dax_new_buf(addr, map_len, first, pos,
+   

Re: [PATCH] pinctrl: sort dt_params array in the same order as pin_config_param

2015-09-22 Thread Bjorn Andersson
On Sun 20 Sep 09:26 PDT 2015, Masahiro Yamada wrote:

> Sort this array in the same order as enum pin_config_param and
> conf_items array for consistency.
> 
> Signed-off-by: Masahiro Yamada 
> ---
> 
>  drivers/pinctrl/pinconf-generic.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/drivers/pinctrl/pinconf-generic.c 
> b/drivers/pinctrl/pinconf-generic.c
> index e63ad9f..f0bc8bb 100644
> --- a/drivers/pinctrl/pinconf-generic.c
> +++ b/drivers/pinctrl/pinconf-generic.c
> @@ -166,11 +166,11 @@ static const struct pinconf_generic_params dt_params[] 
> = {
>   { "input-schmitt-disable", PIN_CONFIG_INPUT_SCHMITT_ENABLE, 0 },
>   { "input-debounce", PIN_CONFIG_INPUT_DEBOUNCE, 0 },
>   { "power-source", PIN_CONFIG_POWER_SOURCE, 0 },
> + { "slew-rate", PIN_CONFIG_SLEW_RATE, 0 },
>   { "low-power-enable", PIN_CONFIG_LOW_POWER_MODE, 1 },
>   { "low-power-disable", PIN_CONFIG_LOW_POWER_MODE, 0 },
>   { "output-low", PIN_CONFIG_OUTPUT, 0, },
>   { "output-high", PIN_CONFIG_OUTPUT, 1, },
> - { "slew-rate", PIN_CONFIG_SLEW_RATE, 0},

I would like to see these alphabetically sorted instead and looking at
most of the entries that's already the case.

The order of the lists in this file the order doesn't matter, so that's
easy. There's some potential implications of renumbering the enum, but I
think it's worth keeping them sorted as well.

Regards,
Bjorn
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 06/15] devm_memunmap: use devres_release()

2015-09-22 Thread Dan Williams
Remove open coded call to memunmap.

Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 kernel/memremap.c |5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/kernel/memremap.c b/kernel/memremap.c
index 72b0c66628b6..0756273437e0 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -131,9 +131,8 @@ EXPORT_SYMBOL(devm_memremap);
 
 void devm_memunmap(struct device *dev, void *addr)
 {
-   WARN_ON(devres_destroy(dev, devm_memremap_release, devm_memremap_match,
-  addr));
-   memunmap(addr);
+   WARN_ON(devres_release(dev, devm_memremap_release,
+   devm_memremap_match, addr));
 }
 EXPORT_SYMBOL(devm_memunmap);
 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 14/15] mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup

2015-09-22 Thread Dan Williams
get_dev_page() enables paths like get_user_pages() to pin a dynamically
mapped pfn-range (devm_memremap_pages()) while the resulting struct page
objects are in use.  Unlike get_page() it may fail if the device is, or
is in the process of being, disabled.  While the initial lookup of the
range may be an expensive list walk, the result is cached to speed up
subsequent lookups which are likely to be in the same mapped range.

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Ross Zwisler 
Cc: Alexander Viro 
Signed-off-by: Dan Williams 
---
 drivers/nvdimm/pmem.c|2 +
 include/linux/io.h   |   17 ---
 include/linux/mm.h   |   62 
 include/linux/mm_types.h |6 +++-
 kernel/memremap.c|   71 ++
 5 files changed, 140 insertions(+), 18 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 1c670775129b..ac581a2e20e2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -184,6 +184,7 @@ static void pmem_detach_disk(struct pmem_device *pmem)
 static int pmem_attach_disk(struct device *dev,
struct nd_namespace_common *ndns, struct pmem_device *pmem)
 {
+   struct nd_namespace_io *nsio = to_nd_namespace_io(>dev);
int nid = dev_to_node(dev);
struct gendisk *disk;
 
@@ -191,6 +192,7 @@ static int pmem_attach_disk(struct device *dev,
if (!pmem->pmem_queue)
return -ENOMEM;
 
+   devm_register_pagemap(dev, >res, 
>pmem_queue->dax_ref.count);
blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
diff --git a/include/linux/io.h b/include/linux/io.h
index de64c1e53612..2f2f8859abd9 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -87,23 +87,6 @@ void *devm_memremap(struct device *dev, resource_size_t 
offset,
size_t size, unsigned long flags);
 void devm_memunmap(struct device *dev, void *addr);
 
-void *__devm_memremap_pages(struct device *dev, struct resource *res);
-
-#ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res);
-#else
-static inline void *devm_memremap_pages(struct device *dev, struct resource 
*res)
-{
-   /*
-* Fail attempts to call devm_memremap_pages() without
-* ZONE_DEVICE support enabled, this requires callers to fall
-* back to plain devm_memremap() based on config
-*/
-   WARN_ON_ONCE(1);
-   return ERR_PTR(-ENXIO);
-}
-#endif
-
 /*
  * Some systems do not have legacy ISA devices.
  * /dev/port is not a valid interface on these systems.
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 989c5459bee7..6183549a854c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,12 +15,14 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 
 struct mempolicy;
 struct anon_vma;
@@ -558,6 +560,28 @@ static inline void init_page_count(struct page *page)
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
+#ifdef CONFIG_ZONE_DEVICE
+void *devm_memremap_pages(struct device *dev, struct resource *res);
+void devm_register_pagemap(struct device *dev, struct resource *res,
+   struct percpu_ref *ref);
+#else
+static inline void *devm_memremap_pages(struct device *dev, struct resource 
*res)
+{
+   /*
+* Fail attempts to call devm_memremap_pages() without
+* ZONE_DEVICE support enabled, this requires callers to fall
+* back to plain devm_memremap() based on config
+*/
+   WARN_ON_ONCE(1);
+   return ERR_PTR(-ENXIO);
+}
+
+static inline void devm_register_pagemap(struct device *dev, struct resource 
*res,
+   struct percpu_ref *ref)
+{
+}
+#endif
+
 void split_page(struct page *page, unsigned int order);
 int split_free_page(struct page *page);
 
@@ -717,6 +741,44 @@ static inline enum zone_type page_zonenum(const struct 
page *page)
return (page->flags >> ZONES_PGSHIFT) & ZONES_MASK;
 }
 
+/**
+ * struct dev_pagemap - reference count for a devm_memremap_pages mapping
+ * @res: physical address range covered by @ref
+ * @ref: reference count that pins the devm_memremap_pages() mapping
+ * @dev: host device of the mapping for debug
+ */
+struct dev_pagemap {
+   const struct resource *res;
+   struct percpu_ref *ref;
+   struct device *dev;
+};
+
+struct dev_pagemap *__get_dev_pagemap(resource_size_t phys);
+
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+   struct dev_pagemap *pgmap)
+{
+   resource_size_t phys = PFN_PHYS(pfn);
+
+   /*
+* In the cached case we're already holding a reference so we can
+* simply do a blind increment
+*/
+   if (pgmap && phys >= 

[PATCH 11/15] mm, dax, pmem: introduce __pfn_t

2015-09-22 Thread Dan Williams
In preparation for enabling get_user_pages() operations on dax mappings,
introduce a type that encapsulates a page-frame-number that can also be
used to encode other information.  This other information is the
historical "page_link" encoding in a scatterlist, but can also denote
"device memory".  Where "device memory" is a set of pfns that are not
part of the kernel's linear mapping by default, but are accessed via the
same memory controller as ram.  The motivation for this new type is
large capacity persistent memory that optionally has struct page entries
in the 'memmap'.

When a driver, like pmem, has established a devm_memremap_pages()
mapping it needs to communicate to upper layers that the pfn has a page
backing.  This property will be leveraged in a later patch to enable
dax-gup.  For now, update all the ->direct_access() implementations to
communicate whether the returned pfn range is mapped.

Cc: Christoph Hellwig 
Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 arch/powerpc/sysdev/axonram.c |8 ++---
 drivers/block/brd.c   |4 +-
 drivers/nvdimm/pmem.c |   27 ---
 drivers/s390/block/dcssblk.c  |   10 ++
 fs/block_dev.c|2 +
 fs/dax.c  |   23 +++--
 include/linux/blkdev.h|4 +-
 include/linux/mm.h|   72 +
 8 files changed, 110 insertions(+), 40 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 24ffab2572e8..35eff52c0a38 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -141,15 +141,13 @@ axon_ram_make_request(struct request_queue *queue, struct 
bio *bio)
  */
 static long
 axon_ram_direct_access(struct block_device *device, sector_t sector,
-  void __pmem **kaddr, unsigned long *pfn)
+  void __pmem **kaddr, __pfn_t *pfn)
 {
struct axon_ram_bank *bank = device->bd_disk->private_data;
loff_t offset = (loff_t)sector << AXON_RAM_SECTOR_SHIFT;
-   void *addr = (void *)(bank->ph_addr + offset);
-
-   *kaddr = (void __pmem *)addr;
-   *pfn = virt_to_phys(addr) >> PAGE_SHIFT;
 
+   *kaddr = (void __pmem __force *) bank->io_addr + offset;
+   *pfn = phys_to_pfn_t(bank->ph_addr + offset, PFN_DEV);
return bank->size - offset;
 }
 
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index f645a71ae827..50e78b1ea26c 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -374,7 +374,7 @@ static int brd_rw_page(struct block_device *bdev, sector_t 
sector,
 
 #ifdef CONFIG_BLK_DEV_RAM_DAX
 static long brd_direct_access(struct block_device *bdev, sector_t sector,
-   void __pmem **kaddr, unsigned long *pfn)
+   void __pmem **kaddr, __pfn_t *pfn)
 {
struct brd_device *brd = bdev->bd_disk->private_data;
struct page *page;
@@ -385,7 +385,7 @@ static long brd_direct_access(struct block_device *bdev, 
sector_t sector,
if (!page)
return -ENOSPC;
*kaddr = (void __pmem *)page_address(page);
-   *pfn = page_to_pfn(page);
+   *pfn = page_to_pfn_t(page);
 
return PAGE_SIZE;
 }
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 3ee02af73ad0..1c670775129b 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -39,6 +39,7 @@ struct pmem_device {
phys_addr_t phys_addr;
/* when non-zero this device is hosting a 'pfn' instance */
phys_addr_t data_offset;
+   unsigned long   pfn_flags;
void __pmem *virt_addr;
size_t  size;
 };
@@ -108,25 +109,22 @@ static int pmem_rw_page(struct block_device *bdev, 
sector_t sector,
 }
 
 static long pmem_direct_access(struct block_device *bdev, sector_t sector,
- void __pmem **kaddr, unsigned long *pfn)
+ void __pmem **kaddr, __pfn_t *pfn)
 {
struct pmem_device *pmem = bdev->bd_disk->private_data;
resource_size_t offset = sector * 512 + pmem->data_offset;
-   resource_size_t size;
+   resource_size_t size = pmem->size - offset;
 
-   if (pmem->data_offset) {
+   *kaddr = pmem->virt_addr + offset;
+   *pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
+
+   if (__pfn_t_has_page(*pfn)) {
/*
 * Limit the direct_access() size to what is covered by
 * the memmap
 */
-   size = (pmem->size - offset) & ~ND_PFN_MASK;
-   } else
-   size = pmem->size - offset;
-
-   /* FIXME convert DAX to comprehend that this mapping has a lifetime */
-   *kaddr = pmem->virt_addr + offset;
-   *pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;
-
+   size &= ~ND_PFN_MASK;
+   }
return size;
 }
 
@@ -158,9 +156,11 @@ static 

[PATCH 12/15] mm, dax, gpu: convert vm_insert_mixed to __pfn_t, introduce _PAGE_DEVMAP

2015-09-22 Thread Dan Williams
Convert the raw unsigned long 'pfn' argument to __pfn_t for the purpose of
evaluating the PFN_MAP and PFN_DEV flags.  When both are set the it
triggers _PAGE_DEVMAP to be set in the resulting pte.  This flag will
later be used in the get_user_pages() path to pin the page mapping,
dynamically allocated by devm_memremap_pages(), until all the resulting
pages are released.

There are no functional changes to the gpu drivers as a result of this
conversion.

This uncovered several architectures with no local definition for
pfn_pte(), in response __pfn_t_pte() is only defined when an arch
opts-in by "#define pfn_pte pfn_pte".

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: David Airlie 
Signed-off-by: Dan Williams 
---
 arch/alpha/include/asm/pgtable.h|1 +
 arch/m68k/include/asm/page_no.h |1 +
 arch/parisc/include/asm/pgtable.h   |1 +
 arch/powerpc/include/asm/pgtable.h  |1 +
 arch/tile/include/asm/pgtable.h |1 +
 arch/um/include/asm/pgtable-3level.h|1 +
 arch/x86/include/asm/pgtable.h  |   18 ++
 arch/x86/include/asm/pgtable_types.h|7 ++-
 drivers/gpu/drm/exynos/exynos_drm_gem.c |3 ++-
 drivers/gpu/drm/gma500/framebuffer.c|2 +-
 drivers/gpu/drm/msm/msm_gem.c   |3 ++-
 drivers/gpu/drm/omapdrm/omap_gem.c  |6 --
 drivers/gpu/drm/ttm/ttm_bo_vm.c |3 ++-
 fs/dax.c|2 +-
 include/linux/mm.h  |   29 -
 mm/memory.c |   15 +--
 16 files changed, 79 insertions(+), 15 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index a9a119592372..a54050fe867e 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -216,6 +216,7 @@ extern unsigned long __zero_page(void);
 })
 #endif
 
+#define pfn_pte pfn_pte
 extern inline pte_t pfn_pte(unsigned long physpfn, pgprot_t pgprot)
 { pte_t pte; pte_val(pte) = (PHYS_TWIDDLE(physpfn) << 32) | 
pgprot_val(pgprot); return pte; }
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef209169579a..930a42f6db44 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -34,6 +34,7 @@ extern unsigned long memory_end;
 
 #definevirt_addr_valid(kaddr)  (((void *)(kaddr) >= (void 
*)PAGE_OFFSET) && \
((void *)(kaddr) < (void *)memory_end))
+#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/parisc/include/asm/pgtable.h 
b/arch/parisc/include/asm/pgtable.h
index f93c4a4e6580..dde7dd7200bd 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -377,6 +377,7 @@ static inline pte_t pte_mkspecial(pte_t pte){ 
return pte; }
 
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
 
+#define pfn_pte pfn_pte
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot)
 {
pte_t pte;
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 0717693c8428..8448ff1542e0 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -67,6 +67,7 @@ static inline int pte_present(pte_t pte)
  * Even if PTEs can be unsigned long long, a PFN is always an unsigned
  * long for now.
  */
+#define pfn_pte pfn_pte
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) {
return __pte(((pte_basic_t)(pfn) << PTE_RPN_SHIFT) |
 pgprot_val(pgprot)); }
diff --git a/arch/tile/include/asm/pgtable.h b/arch/tile/include/asm/pgtable.h
index 2b05ccbebed9..37c9aa3a3f0c 100644
--- a/arch/tile/include/asm/pgtable.h
+++ b/arch/tile/include/asm/pgtable.h
@@ -275,6 +275,7 @@ static inline unsigned long pte_pfn(pte_t pte)
 extern pgprot_t set_remote_cache_cpu(pgprot_t prot, int cpu);
 extern int get_remote_cache_cpu(pgprot_t prot);
 
+#define pfn_pte pfn_pte
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 {
return hv_pte_set_pa(prot, PFN_PHYS(pfn));
diff --git a/arch/um/include/asm/pgtable-3level.h 
b/arch/um/include/asm/pgtable-3level.h
index 2b4274e7c095..4de681d15911 100644
--- a/arch/um/include/asm/pgtable-3level.h
+++ b/arch/um/include/asm/pgtable-3level.h
@@ -98,6 +98,7 @@ static inline unsigned long pte_pfn(pte_t pte)
return phys_to_pfn(pte_val(pte));
 }
 
+#define pfn_pte pfn_pte
 static inline pte_t pfn_pte(pfn_t page_nr, pgprot_t pgprot)
 {
pte_t pte;
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 867da5bbb4a3..02a54e5b7930 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -248,6 +248,11 @@ static inline pte_t pte_mkspecial(pte_t pte)
return pte_set_flags(pte, _PAGE_SPECIAL);
 }
 
+static inline pte_t pte_mkdevmap(pte_t pte)
+{
+   return pte_set_flags(pte, 

[PATCH 15/15] mm, x86: get_user_pages() for dax mappings

2015-09-22 Thread Dan Williams
A dax mapping establishes a pte with _PAGE_DEVMAP set when the driver
has established a devm_memremap_pages() mapping, i.e. when the __pfn_t
return from ->direct_access() has PFN_DEV and PFN_MAP set.  Later, when
encountering _PAGE_DEVMAP during a page table walk we lookup and pin a
struct dev_pagemap instance to keep the result of pfn_to_page() valid
until put_page().

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Cc: Thomas Gleixner 
Cc: Ingo Molnar 
Cc: H. Peter Anvin 
Cc: Jeff Moyer 
Cc: Peter Zijlstra 
Cc: Matthew Wilcox 
Cc: Alexander Viro 
Cc: Dave Chinner 
Signed-off-by: Dan Williams 
---
 arch/ia64/include/asm/pgtable.h |1 +
 arch/x86/include/asm/pgtable.h  |2 +
 arch/x86/mm/gup.c   |   56 +--
 include/linux/mm.h  |   42 -
 mm/gup.c|   11 +++-
 mm/hugetlb.c|   18 -
 mm/swap.c   |   15 ++
 7 files changed, 126 insertions(+), 19 deletions(-)

diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index 9f3ed9ee8f13..81d2af23958f 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -273,6 +273,7 @@ extern unsigned long VMALLOC_END;
 #define pmd_clear(pmdp)(pmd_val(*(pmdp)) = 0UL)
 #define pmd_page_vaddr(pmd)((unsigned long) __va(pmd_val(pmd) & 
_PFN_MASK))
 #define pmd_page(pmd)  virt_to_page((pmd_val(pmd) + 
PAGE_OFFSET))
+#define pmd_pfn(pmd)   (pmd_val(pmd) >> PAGE_SHIFT)
 
 #define pud_none(pud)  (!pud_val(pud))
 #define pud_bad(pud)   (!ia64_phys_addr_valid(pud_val(pud)))
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 84d1346e1cda..d29dc7b4924b 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -461,7 +461,7 @@ static inline int pte_present(pte_t a)
 #define pte_devmap pte_devmap
 static inline int pte_devmap(pte_t a)
 {
-   return pte_flags(a) & _PAGE_DEVMAP;
+   return (pte_flags(a) & _PAGE_DEVMAP) == _PAGE_DEVMAP;
 }
 
 #define pte_accessible pte_accessible
diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c
index 81bf3d2af3eb..7254ba4f791d 100644
--- a/arch/x86/mm/gup.c
+++ b/arch/x86/mm/gup.c
@@ -63,6 +63,16 @@ retry:
 #endif
 }
 
+static void undo_dev_pagemap(int *nr, int nr_start, struct page **pages)
+{
+   while ((*nr) - nr_start) {
+   struct page *page = pages[--(*nr)];
+
+   ClearPageReferenced(page);
+   put_page(page);
+   }
+}
+
 /*
  * The performance critical leaf functions are made noinline otherwise gcc
  * inlines everything into a single function which results in too much
@@ -71,7 +81,9 @@ retry:
 static noinline int gup_pte_range(pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
 {
+   struct dev_pagemap *pgmap = NULL;
unsigned long mask;
+   int nr_start = *nr;
pte_t *ptep;
 
mask = _PAGE_PRESENT|_PAGE_USER;
@@ -89,13 +101,21 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long 
addr,
return 0;
}
 
-   if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
+   page = pte_page(pte);
+   if (pte_devmap(pte)) {
+   pgmap = get_dev_pagemap(pte_pfn(pte), pgmap);
+   if (unlikely(!pgmap)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   pte_unmap(ptep);
+   return 0;
+   }
+   } else if ((pte_flags(pte) & (mask | _PAGE_SPECIAL)) != mask) {
pte_unmap(ptep);
return 0;
}
VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
-   page = pte_page(pte);
get_page(page);
+   put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -114,6 +134,32 @@ static inline void get_head_page_multiple(struct page 
*page, int nr)
SetPageReferenced(page);
 }
 
+static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr,
+   unsigned long end, struct page **pages, int *nr)
+{
+   int nr_start = *nr;
+   unsigned long pfn = pmd_pfn(pmd);
+   struct dev_pagemap *pgmap = NULL;
+
+   pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT;
+   do {
+   struct page *page = pfn_to_page(pfn);
+
+   pgmap = get_dev_pagemap(pfn, pgmap);
+   if (unlikely(!pgmap)) {
+   undo_dev_pagemap(nr, nr_start, pages);
+   return 0;
+   }
+   SetPageReferenced(page);
+   pages[*nr] = page;
+   

[PATCH 08/15] block, dax, pmem: reference counting infrastructure

2015-09-22 Thread Dan Williams
Enable DAX to use a reference count for keeping the virtual address
returned by ->direct_access() valid for the duration of its usage in
fs/dax.c, or otherwise hold off blk_cleanup_queue() while
pmem_make_request is active.  The blk-mq code is already in a position
to need low overhead referece counting for races against request_queue
destruction (blk_cleanup_queue()).  Given DAX-enabled block drivers do
not enable blk-mq, share the storage in 'struct request_queue' between
the two implementations.

Cc: Jens Axboe 
Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 arch/powerpc/sysdev/axonram.c |2 -
 block/blk-core.c  |   84 +
 block/blk-mq-sysfs.c  |2 -
 block/blk-mq.c|   48 ++-
 block/blk-sysfs.c |9 
 block/blk.h   |3 +
 drivers/block/brd.c   |2 -
 drivers/nvdimm/pmem.c |3 +
 drivers/s390/block/dcssblk.c  |2 -
 include/linux/blkdev.h|   20 --
 10 files changed, 130 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index d2b79bc336c1..24ffab2572e8 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -228,7 +228,7 @@ static int axon_ram_probe(struct platform_device *device)
sprintf(bank->disk->disk_name, "%s%d",
AXON_RAM_DEVICE_NAME, axon_ram_bank_id);
 
-   bank->disk->queue = blk_alloc_queue(GFP_KERNEL);
+   bank->disk->queue = blk_dax_init_queue(NUMA_NO_NODE);
if (bank->disk->queue == NULL) {
dev_err(>dev, "Cannot register disk queue\n");
rc = -EFAULT;
diff --git a/block/blk-core.c b/block/blk-core.c
index 2eb722d48773..13764f8b22e0 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -26,6 +26,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -497,6 +498,84 @@ void blk_queue_bypass_end(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_queue_bypass_end);
 
+int blk_qref_enter(struct request_queue_ref *qref, gfp_t gfp)
+{
+   struct request_queue *q = container_of(qref, typeof(*q), mq_ref);
+
+   while (true) {
+   int ret;
+
+   if (percpu_ref_tryget_live(>count))
+   return 0;
+
+   if (!(gfp & __GFP_WAIT))
+   return -EBUSY;
+
+   ret = wait_event_interruptible(qref->freeze_wq,
+   !atomic_read(>freeze_depth) ||
+   blk_queue_dying(q));
+   if (blk_queue_dying(q))
+   return -ENODEV;
+   if (ret)
+   return ret;
+   }
+}
+
+void blk_qref_release(struct percpu_ref *ref)
+{
+   struct request_queue_ref *qref = container_of(ref, typeof(*qref), 
count);
+
+   wake_up_all(>freeze_wq);
+}
+
+int blk_dax_get(struct request_queue *q)
+{
+   return blk_qref_enter(>dax_ref, GFP_NOWAIT);
+}
+
+void blk_dax_put(struct request_queue *q)
+{
+   percpu_ref_put(>dax_ref.count);
+}
+
+static void blk_dax_freeze(struct request_queue *q)
+{
+   if (!blk_queue_dax(q))
+   return;
+
+   if (atomic_inc_return(>dax_ref.freeze_depth) == 1)
+   percpu_ref_kill(>dax_ref.count);
+
+   wait_event(q->dax_ref.freeze_wq, percpu_ref_is_zero(>dax_ref.count));
+}
+
+struct request_queue *blk_dax_init_queue(int nid)
+{
+   struct request_queue *q;
+   int rc;
+
+   q = blk_alloc_queue_node(GFP_KERNEL, nid);
+   if (!q)
+   return ERR_PTR(-ENOMEM);
+   queue_flag_set_unlocked(QUEUE_FLAG_DAX, q);
+
+   rc = percpu_ref_init(>dax_ref.count, blk_qref_release, 0,
+   GFP_KERNEL);
+   if (rc) {
+   blk_cleanup_queue(q);
+   return ERR_PTR(rc);
+   }
+   return q;
+}
+EXPORT_SYMBOL(blk_dax_init_queue);
+
+static void blk_dax_exit(struct request_queue *q)
+{
+   if (!blk_queue_dax(q))
+   return;
+   percpu_ref_exit(>dax_ref.count);
+}
+
 void blk_set_queue_dying(struct request_queue *q)
 {
queue_flag_set_unlocked(QUEUE_FLAG_DYING, q);
@@ -558,6 +637,7 @@ void blk_cleanup_queue(struct request_queue *q)
blk_mq_freeze_queue(q);
spin_lock_irq(lock);
} else {
+   blk_dax_freeze(q);
spin_lock_irq(lock);
__blk_drain_queue(q, true);
}
@@ -570,6 +650,7 @@ void blk_cleanup_queue(struct request_queue *q)
 
if (q->mq_ops)
blk_mq_free_queue(q);
+   blk_dax_exit(q);
 
spin_lock_irq(lock);
if (q->queue_lock != >__queue_lock)
@@ -688,7 +769,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, 
int node_id)
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, >queue_flags);
 
-   

[PATCH 13/15] mm, dax: convert vmf_insert_pfn_pmd() to __pfn_t

2015-09-22 Thread Dan Williams
Similar to the conversion of vm_insert_mixed() use __pfn_t in the
vmf_insert_pfn_pmd() to tag the resulting pte with _PAGE_DEVICE when the
pfn is backed by a devm_memremap_pages() mapping.

Cc: Dave Hansen 
Cc: Andrew Morton 
Cc: Matthew Wilcox 
Cc: Alexander Viro 
Signed-off-by: Dan Williams 
---
 arch/sparc/include/asm/pgtable_64.h |2 ++
 arch/x86/include/asm/pgtable.h  |6 ++
 arch/x86/mm/pat.c   |4 ++--
 fs/dax.c|2 +-
 include/asm-generic/pgtable.h   |6 --
 include/linux/huge_mm.h |2 +-
 include/linux/mm.h  |   27 +--
 include/linux/pfn.h |9 +
 mm/huge_memory.c|   10 ++
 mm/memory.c |2 +-
 10 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/arch/sparc/include/asm/pgtable_64.h 
b/arch/sparc/include/asm/pgtable_64.h
index 131d36fcd07a..496ef783c68c 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -234,6 +234,7 @@ extern struct page *mem_map_zero;
  * the first physical page in the machine is at some huge physical address,
  * such as 4GB.   This is common on a partitioned E1, for example.
  */
+#define pfn_pte pfn_pte
 static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot)
 {
unsigned long paddr = pfn << PAGE_SHIFT;
@@ -244,6 +245,7 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
prot)
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define pfn_pmd pfn_pmd
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
pte_t pte = pfn_pte(page_nr, pgprot);
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 02a54e5b7930..84d1346e1cda 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -282,6 +282,11 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd)
return pmd_set_flags(pmd, _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
 }
 
+static inline pmd_t pmd_mkdevmap(pmd_t pmd)
+{
+   return pmd_set_flags(pmd, _PAGE_DEVMAP);
+}
+
 static inline pmd_t pmd_mkhuge(pmd_t pmd)
 {
return pmd_set_flags(pmd, _PAGE_PSE);
@@ -346,6 +351,7 @@ static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t 
pgprot)
 massage_pgprot(pgprot));
 }
 
+#define pfn_pmd pfn_pmd
 static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
 {
return __pmd(((phys_addr_t)page_nr << PAGE_SHIFT) |
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 188e3e07eeeb..2e02064dbe45 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -949,7 +949,7 @@ int track_pfn_remap(struct vm_area_struct *vma, pgprot_t 
*prot,
 }
 
 int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
-unsigned long pfn)
+__pfn_t pfn)
 {
enum page_cache_mode pcm;
 
@@ -957,7 +957,7 @@ int track_pfn_insert(struct vm_area_struct *vma, pgprot_t 
*prot,
return 0;
 
/* Set prot based on lookup */
-   pcm = lookup_memtype((resource_size_t)pfn << PAGE_SHIFT);
+   pcm = lookup_memtype(__pfn_t_to_phys(pfn));
*prot = __pgprot((pgprot_val(vma->vm_page_prot) & (~_PAGE_CACHE_MASK)) |
 cachemode2protval(pcm));
 
diff --git a/fs/dax.c b/fs/dax.c
index b93dbf363dc2..321966335f33 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -681,7 +681,7 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned 
long address,
goto fallback;
 
result |= vmf_insert_pfn_pmd(vma, address, pmd,
-   __pfn_t_to_pfn(pfn), write);
+   pfn, write);
}
 
  out:
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 29c57b2cb344..a65f86061563 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_GENERIC_PGTABLE_H
 #define _ASM_GENERIC_PGTABLE_H
 
+#include 
+
 #ifndef __ASSEMBLY__
 #ifdef CONFIG_MMU
 
@@ -521,7 +523,7 @@ static inline int track_pfn_remap(struct vm_area_struct 
*vma, pgprot_t *prot,
  * by vm_insert_pfn().
  */
 static inline int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
-  unsigned long pfn)
+  __pfn_t pfn)
 {
return 0;
 }
@@ -549,7 +551,7 @@ extern int track_pfn_remap(struct vm_area_struct *vma, 
pgprot_t *prot,
   unsigned long pfn, unsigned long addr,
   unsigned long size);
 extern int track_pfn_insert(struct vm_area_struct *vma, pgprot_t *prot,
-   unsigned long pfn);
+   __pfn_t pfn);
 extern int track_pfn_copy(struct vm_area_struct *vma);
 extern void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
  

[PATCH 04/15] x86, mm: quiet arch_add_memory()

2015-09-22 Thread Dan Williams
Switch to pr_debug() so that dynamic-debug can disable these messages by
default.  This gets noisy in the presence of devm_memremap_pages().

Signed-off-by: Dan Williams 
---
 arch/x86/mm/init.c|4 ++--
 arch/x86/mm/init_64.c |4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 1d8a83df153a..4b9ea3f27de4 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -354,7 +354,7 @@ static int __meminit split_mem_range(struct map_range *mr, 
int nr_range,
}
 
for (i = 0; i < nr_range; i++)
-   printk(KERN_DEBUG " [mem %#010lx-%#010lx] page %s\n",
+   pr_debug(" [mem %#010lx-%#010lx] page %s\n",
mr[i].start, mr[i].end - 1,
page_size_string([i]));
 
@@ -401,7 +401,7 @@ unsigned long __init_refok init_memory_mapping(unsigned 
long start,
unsigned long ret = 0;
int nr_range, i;
 
-   pr_info("init_memory_mapping: [mem %#010lx-%#010lx]\n",
+   pr_debug("init_memory_mapping: [mem %#010lx-%#010lx]\n",
   start, end - 1);
 
memset(mr, 0, sizeof(mr));
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 30564e2752d3..bf827f231470 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1268,7 +1268,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
/* check to see if we have contiguous blocks */
if (p_end != p || node_start != node) {
if (p_start)
-   printk(KERN_DEBUG " [%lx-%lx] 
PMD -> [%p-%p] on node %d\n",
+   pr_debug(" [%lx-%lx] PMD -> 
[%p-%p] on node %d\n",
   addr_start, addr_end-1, 
p_start, p_end-1, node_start);
addr_start = addr;
node_start = node;
@@ -1366,7 +1366,7 @@ void register_page_bootmem_memmap(unsigned long 
section_nr,
 void __meminit vmemmap_populate_print_last(void)
 {
if (p_start) {
-   printk(KERN_DEBUG " [%lx-%lx] PMD -> [%p-%p] on node %d\n",
+   pr_debug(" [%lx-%lx] PMD -> [%p-%p] on node %d\n",
addr_start, addr_end-1, p_start, p_end-1, node_start);
p_start = NULL;
p_end = NULL;

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 01/15] avr32: convert to asm-generic/memory_model.h

2015-09-22 Thread Dan Williams
Switch avr32/include/asm/page.h to use the common defintions for
pfn_to_page(), page_to_pfn(), and ARCH_PFN_OFFSET.

Signed-off-by: Dan Williams 
---
 arch/avr32/include/asm/page.h |8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/avr32/include/asm/page.h b/arch/avr32/include/asm/page.h
index f805d1cb11bc..c5d2a3e2c62f 100644
--- a/arch/avr32/include/asm/page.h
+++ b/arch/avr32/include/asm/page.h
@@ -83,11 +83,9 @@ static inline int get_order(unsigned long size)
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 
-#define PHYS_PFN_OFFSET(CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
+#define ARCH_PFN_OFFSET(CONFIG_PHYS_OFFSET >> PAGE_SHIFT)
 
-#define pfn_to_page(pfn)   (mem_map + ((pfn) - PHYS_PFN_OFFSET))
-#define page_to_pfn(page)  ((unsigned long)((page) - mem_map) + 
PHYS_PFN_OFFSET)
-#define pfn_valid(pfn) ((pfn) >= PHYS_PFN_OFFSET && (pfn) < 
(PHYS_PFN_OFFSET + max_mapnr))
+#define pfn_valid(pfn) ((pfn) >= ARCH_PFN_OFFSET && (pfn) < 
(ARCH_PFN_OFFSET + max_mapnr))
 #endif /* CONFIG_NEED_MULTIPLE_NODES */
 
 #define virt_to_page(kaddr)pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
@@ -101,4 +99,6 @@ static inline int get_order(unsigned long size)
  */
 #define HIGHMEM_START  0x2000UL
 
+#include 
+
 #endif /* __ASM_AVR32_PAGE_H */

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 07/15] devm_memremap: convert to return ERR_PTR

2015-09-22 Thread Dan Williams
Make devm_memremap consistent with the error return scheme of
devm_memremap_pages to remove special casing in the pmem driver.

Cc: Christoph Hellwig 
Cc: Ross Zwisler 
Signed-off-by: Dan Williams 
---
 drivers/nvdimm/pmem.c |   16 ++--
 kernel/memremap.c |2 +-
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0680affae04a..9805d311b1d1 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -150,19 +150,15 @@ static struct pmem_device *pmem_alloc(struct device *dev,
return ERR_PTR(-EBUSY);
}
 
-   if (pmem_should_map_pages(dev)) {
-   void *addr = devm_memremap_pages(dev, res);
-
-   if (IS_ERR(addr))
-   return addr;
-   pmem->virt_addr = (void __pmem *) addr;
-   } else {
+   if (pmem_should_map_pages(dev))
+   pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res);
+   else
pmem->virt_addr = (void __pmem *) devm_memremap(dev,
pmem->phys_addr, pmem->size,
ARCH_MEMREMAP_PMEM);
-   if (!pmem->virt_addr)
-   return ERR_PTR(-ENXIO);
-   }
+
+   if (IS_ERR(pmem->virt_addr))
+   return (void __force *) pmem->virt_addr;
 
return pmem;
 }
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 0756273437e0..0d818ce04129 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -116,7 +116,7 @@ void *devm_memremap(struct device *dev, resource_size_t 
offset,
 
ptr = devres_alloc(devm_memremap_release, sizeof(*ptr), GFP_KERNEL);
if (!ptr)
-   return NULL;
+   return ERR_PTR(-ENOMEM);
 
addr = memremap(offset, size, flags);
if (addr) {

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 05/15] pmem: kill memremap_pmem()

2015-09-22 Thread Dan Williams
Now that the pmem-api is defined as "a set of apis that enables access
to WB mapped pmem",  the mapping type is implied.  Remove the wrapper
and push the functionality down into the pmem driver in preparation for
adding support for direct-mapped pmem.

Signed-off-by: Dan Williams 
---
 drivers/nvdimm/pmem.c |9 +
 include/linux/pmem.h  |   26 +-
 2 files changed, 6 insertions(+), 29 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 0ba6a978f227..0680affae04a 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -157,8 +157,9 @@ static struct pmem_device *pmem_alloc(struct device *dev,
return addr;
pmem->virt_addr = (void __pmem *) addr;
} else {
-   pmem->virt_addr = memremap_pmem(dev, pmem->phys_addr,
-   pmem->size);
+   pmem->virt_addr = (void __pmem *) devm_memremap(dev,
+   pmem->phys_addr, pmem->size,
+   ARCH_MEMREMAP_PMEM);
if (!pmem->virt_addr)
return ERR_PTR(-ENXIO);
}
@@ -363,8 +364,8 @@ static int nvdimm_namespace_attach_pfn(struct 
nd_namespace_common *ndns)
 
/* establish pfn range for lookup, and switch to direct map */
pmem = dev_get_drvdata(dev);
-   memunmap_pmem(dev, pmem->virt_addr);
-   pmem->virt_addr = (void __pmem *)devm_memremap_pages(dev, >res);
+   devm_memunmap(dev, (void __force *) pmem->virt_addr);
+   pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, >res);
if (IS_ERR(pmem->virt_addr)) {
rc = PTR_ERR(pmem->virt_addr);
goto err;
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
index 85f810b33917..acfea8ce4a07 100644
--- a/include/linux/pmem.h
+++ b/include/linux/pmem.h
@@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem 
const *src, size_t si
memcpy(dst, (void __force const *) src, size);
 }
 
-static inline void memunmap_pmem(struct device *dev, void __pmem *addr)
-{
-   devm_memunmap(dev, (void __force *) addr);
-}
-
 static inline bool arch_has_pmem_api(void)
 {
return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
@@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void)
  * These defaults seek to offer decent performance and minimize the
  * window between i/o completion and writes being durable on media.
  * However, it is undefined / architecture specific whether
- * default_memremap_pmem + default_memcpy_to_pmem is sufficient for
+ * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for
  * making data durable relative to i/o completion.
  */
 static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src,
@@ -117,25 +112,6 @@ static inline void default_clear_pmem(void __pmem *addr, 
size_t size)
 }
 
 /**
- * memremap_pmem - map physical persistent memory for pmem api
- * @offset: physical address of persistent memory
- * @size: size of the mapping
- *
- * Establish a mapping of the architecture specific memory type expected
- * by memcpy_to_pmem() and wmb_pmem().  For example, it may be
- * the case that an uncacheable or writethrough mapping is sufficient,
- * or a writeback mapping provided memcpy_to_pmem() and
- * wmb_pmem() arrange for the data to be written through the
- * cache to persistent media.
- */
-static inline void __pmem *memremap_pmem(struct device *dev,
-   resource_size_t offset, unsigned long size)
-{
-   return (void __pmem *) devm_memremap(dev, offset, size,
-   ARCH_MEMREMAP_PMEM);
-}
-
-/**
  * memcpy_to_pmem - copy data to persistent memory
  * @dst: destination buffer for the copy
  * @src: source buffer for the copy

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 03/15] frv: fix compiler warning from definition of __pmd()

2015-09-22 Thread Dan Williams
Take into account that the pmd_t type is a array inside a struct, so it
needs two levels of brackets to initialize.  Otherwise, a usage of __pmd
generates a warning:

include/linux/mm.h:986:2: warning: missing braces around initializer 
[-Wmissing-braces]

Signed-off-by: Dan Williams 
---
 arch/frv/include/asm/page.h |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/frv/include/asm/page.h b/arch/frv/include/asm/page.h
index 8c97068ac8fc..688d8076a43a 100644
--- a/arch/frv/include/asm/page.h
+++ b/arch/frv/include/asm/page.h
@@ -34,7 +34,7 @@ typedef struct page *pgtable_t;
 #define pgprot_val(x)  ((x).pgprot)
 
 #define __pte(x)   ((pte_t) { (x) } )
-#define __pmd(x)   ((pmd_t) { (x) } )
+#define __pmd(x)   ((pmd_t) { { (x) } } )
 #define __pud(x)   ((pud_t) { (x) } )
 #define __pgd(x)   ((pgd_t) { (x) } )
 #define __pgprot(x)((pgprot_t) { (x) } )

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 00/15] get_user_pages() for dax mappings

2015-09-22 Thread Dan Williams
To date, we have implemented two I/O usage models for persistent memory,
PMEM (a persistent "ram disk") and DAX (mmap persistent memory into
userspace).  This series adds a third, DAX-GUP, that allows DAX mappings
to be the target of direct-i/o.  It allows userspace to coordinate
DMA/RDMA from/to persitent memory.

The implementation leverages the ZONE_DEVICE mm-zone that went into
4.3-rc1 to flag pages that are owned and dynamically mapped by a device
driver.  The pmem driver, after mapping a persistent memory range into
the system memmap via devm_memremap_pages(), arranges for DAX to
distinguish pfn-only versus page-backed pmem-pfns via flags in the new
__pfn_t type.  The DAX code, upon seeing a PFN_DEV+PFN_MAP flagged pfn,
flags the resulting pte(s) inserted into the process page tables with a
new _PAGE_DEVMAP flag.  Later, when get_user_pages() is walking ptes it
keys off _PAGE_DEVMAP to pin the device hosting the page range active.
Finally, get_page() and put_page() are modified to take references
against the device driver established page mapping.

Next step, more testing specifically DAX-get_user_pages() vs truncate.

Patches 1 - 3 are general compilation fixups from 0day-kbuild reports
while developing this series.

Patches 4 - 7 are minor cleanups and reworks of the devm_memremap_* api.

Patches 8 - 10 add a reference counter for pinning the pmem driver
active while it is in use.  It turns out, prior to these changes, you
can reliably crash the kernel on shutdown if the pmem device is unbound
while hosting a mounted filesystem.

Patches 11 - 15 use __pfn_t and the _PAGE_DEVMAP flag to implement the
dax-gup path.

This series is built on 4.3-rc2 plus the __dax_pmd_fault fix from Ross:
https://patchwork.kernel.org/patch/7244961/

---

Dan Williams (15):
  avr32: convert to asm-generic/memory_model.h
  hugetlb: fix compile error on tile
  frv: fix compiler warning from definition of __pmd()
  x86, mm: quiet arch_add_memory()
  pmem: kill memremap_pmem()
  devm_memunmap: use devres_release()
  devm_memremap: convert to return ERR_PTR
  block, dax, pmem: reference counting infrastructure
  block, pmem: fix null pointer de-reference on shutdown, check for queue 
death
  block, dax: fix lifetime of in-kernel dax mappings
  mm, dax, pmem: introduce __pfn_t
  mm, dax, gpu: convert vm_insert_mixed to __pfn_t, introduce _PAGE_DEVMAP
  mm, dax: convert vmf_insert_pfn_pmd() to __pfn_t
  mm, dax, pmem: introduce {get|put}_dev_pagemap() for dax-gup
  mm, x86: get_user_pages() for dax mappings


 arch/alpha/include/asm/pgtable.h|1 
 arch/avr32/include/asm/page.h   |8 +
 arch/frv/include/asm/page.h |2 
 arch/ia64/include/asm/pgtable.h |1 
 arch/m68k/include/asm/page_no.h |1 
 arch/parisc/include/asm/pgtable.h   |1 
 arch/powerpc/include/asm/pgtable.h  |1 
 arch/powerpc/sysdev/axonram.c   |   10 +
 arch/sparc/include/asm/pgtable_64.h |2 
 arch/tile/include/asm/pgtable.h |1 
 arch/um/include/asm/pgtable-3level.h|1 
 arch/x86/include/asm/pgtable.h  |   24 
 arch/x86/include/asm/pgtable_types.h|7 +
 arch/x86/mm/gup.c   |   56 
 arch/x86/mm/init.c  |4 -
 arch/x86/mm/init_64.c   |4 -
 arch/x86/mm/pat.c   |4 -
 block/blk-core.c|   86 -
 block/blk-mq-sysfs.c|2 
 block/blk-mq.c  |   48 ++-
 block/blk-sysfs.c   |9 +
 block/blk.h |3 
 drivers/block/brd.c |6 -
 drivers/gpu/drm/exynos/exynos_drm_gem.c |3 
 drivers/gpu/drm/gma500/framebuffer.c|2 
 drivers/gpu/drm/msm/msm_gem.c   |3 
 drivers/gpu/drm/omapdrm/omap_gem.c  |6 +
 drivers/gpu/drm/ttm/ttm_bo_vm.c |3 
 drivers/nvdimm/pmem.c   |   57 +---
 drivers/s390/block/dcssblk.c|   12 +-
 fs/block_dev.c  |2 
 fs/dax.c|  140 +---
 include/asm-generic/pgtable.h   |6 +
 include/linux/blkdev.h  |   24 +++-
 include/linux/huge_mm.h |2 
 include/linux/hugetlb.h |1 
 include/linux/io.h  |   17 --
 include/linux/mm.h  |  212 +--
 include/linux/mm_types.h|6 +
 include/linux/pfn.h |9 +
 include/linux/pmem.h|   26 
 kernel/memremap.c   |   78 +++
 mm/gup.c|   11 +-
 mm/huge_memory.c|   10 +
 mm/hugetlb.c|   18 ++-
 mm/memory.c |   17 +-
 mm/swap.c   

Apple Keyboard (SPI) Driver

2015-09-22 Thread Aleksa Sarai
AFAICS, nobody is working on writing a driver for the MacBook8,1
keyboard (which uses SPI as opposed to USB). Oddly, the vendor ID and
product ID are the same as the USB counterpart. Would it be possible
to port the logic of the hid-apple driver to SPI, or are the two
serial protocols too different to make this a worthwhile activity? In
either case, is there some information on how the hid-apple driver was
written (was the logic from the open source apple driver adapted, or
were they written from scratch?) and if a similar technique could be
applied to creating an SPI driver?

Cheers.

-- 
Aleksa Sarai (cyphar)
www.cyphar.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Xen-devel] [PATCH RFC] xen: if on Xen, "flatten" the scheduling domain hierarchy

2015-09-22 Thread Juergen Gross

On 09/22/2015 06:22 PM, George Dunlap wrote:

On 09/22/2015 05:42 AM, Juergen Gross wrote:

One other thing I just discovered: there are other consumers of the
topology sibling masks (e.g. topology_sibling_cpumask()) as well.

I think we would want to avoid any optimizations based on those in
drivers as well, not only in the scheduler.


I'm beginning to lose the thread of the discussion here a bit.

Juergen / Dario, could one of you summarize your two approaches, and the
(alleged) advantages and disadvantages of each one?


Okay, I'll have a try:

The problem we want to solve:
-

The Linux kernel is gathering cpu topology data during boot via the
CPUID instruction on each processor coming online. This data is
primarily used in the scheduler to decide to which cpu a thread should
be migrated when this seems to be necessary. There are other users of
the topology information in the kernel (e.g. some drivers try to do
optimizations like core-specific queues/lists).

When started in a virtualized environment the obtained data is next to
useless or even wrong, as it is reflecting only the status of the time
of booting the system. Scheduling of the (v)cpus done by the hypervisor
is changing the topology beneath the feet of the Linux kernel without
reflecting this in the gathered topology information. So any decisions
taken based on that data will be clueless and possibly just wrong.

The minimal solution is to change the topology data in the kernel in a
way that all cpus are regarded as equal regarding their relation to each
other (e.g. when migrating a thread to another cpu no cpu is preferred
as a target).

The topology information of the CPUID instruction is, however, even
accessible form user mode and might be used for licensing purposes of
any user program (e.g. by limiting the software to run on a specific
number of cores or sockets). So just mangling the data returned by
CPUID in the hypervisor seems not to be a general solution, while we
might want to do it at least optionally in the future.

In the future we might want to support either dynamic topology updates
or be able to tell the kernel to use some of the topology data, e.g.
when pinning vcpus.


Solution 1 (Dario):
---

Don't use the CPUID derived topology information in the Linux scheduler,
but let it use a simple "flat" topology by setting own scheduler domain
data under Xen.

Advantages:
+ very clean solution regarding the scheduler interface
+ scheduler decisions are based on a minimal data set
+ small patch

Disadvantages:
- covers the scheduler only, drivers still use the "wrong" data
- a little bit hacky regarding some NUMA architectures (needs either a
  hook in the code dealing with that architecture or multiple scheduler
  domain data overwrites)
- future enhancements will make the solution less clean (either need
  duplicating scheduler domain data or some new hooks in scheduler
  domain interface)


Solution 2 (Juergen):
-

When booted as a Xen guest modify the topology data built during boot
resulting in the same simple "flat" topology as in Dario's solution.

Advantages:
+ the simple topology is seen by all consumers of topology data as the
  data itself is modified accordingly
+ small patch
+ future enhancements rather easy by selecting which data to modify

Disadvantages:
- interface to scheduler not as clean as in Dario's approach
- scheduler decisions are based on multiple layers of topology data
  where one layer would be enough to describe the topology


Dario, are you okay with this summary?

Juergen
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH RESEND] drivers: Update help for firmware loading options

2015-09-22 Thread Eric Biggers
Update the help text to account for the fact that the kernel can now load
firmware directly from the filesystem.  Also clarify how the FW_LOADER
option works.

Signed-off-by: Eric Biggers 
---
 drivers/base/Kconfig | 33 +
 1 file changed, 21 insertions(+), 12 deletions(-)

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 98504ec..2150f6c 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -78,12 +78,21 @@ config PREVENT_FIRMWARE_BUILD
  If unsure, say Y here.
 
 config FW_LOADER
-   tristate "Userspace firmware loading support" if EXPERT
+   tristate "Firmware loading support" if EXPERT
default y
---help---
- This option is provided for the case where none of the in-tree modules
- require userspace firmware loading support, but a module built
- out-of-tree does.
+ If selected, the kernel will be able to load firmware when asked to do
+ so by device drivers.  Firmware is usually located in a known location
+ on the filesystem such as /lib/firmware/ and is loaded by the kernel
+ on request.  Firmware can also be compiled into the kernel or loaded
+ via a (deprecated) user-mode helper.
+
+ This option is automatically selected by in-tree modules that need
+ firmware loading support.  If no in-tree modules need it, then you can
+ choose N, unless you have out-of-tree modules that need it in which
+ case you will need to choose M or Y.
+
+ If unsure, say Y.
 
 config FIRMWARE_IN_KERNEL
bool "Include in-kernel firmware blobs in kernel binary"
@@ -95,20 +104,20 @@ config FIRMWARE_IN_KERNEL
  use these is to run "make firmware_install", which, after
  converting ihex files to binary, copies all of the needed
  binary files in firmware/ to /lib/firmware/ on your system so
- that they can be loaded by userspace helpers on request.
+ that they can be loaded by the kernel on request.
 
  Enabling this option will build each required firmware blob
  into the kernel directly, where request_firmware() will find
- them without having to call out to userspace. This may be
+ them without having to call out to the filesystem. This may be
  useful if your root file system requires a device that uses
- such firmware and do not wish to use an initrd.
+ such firmware and you do not wish to use an initrd.
 
  This single option controls the inclusion of firmware for
  every driver that uses request_firmware() and ships its
  firmware in the kernel source tree, which avoids a
  proliferation of 'Include firmware for xxx device' options.
 
- Say 'N' and let firmware be loaded from userspace.
+ Say 'N' and let firmware be loaded from the filesystem.
 
 config EXTRA_FIRMWARE
string "External firmware blobs to build into the kernel binary"
@@ -116,9 +125,9 @@ config EXTRA_FIRMWARE
help
  This option allows firmware to be built into the kernel for the case
  where the user either cannot or doesn't want to provide it from
- userspace at runtime (for example, when the firmware in question is
- required for accessing the boot device, and the user doesn't want to
- use an initrd).
+ the filesystem at runtime (for example, when the firmware in question
+ is required for accessing the boot device, and the user doesn't want
+ to use an initrd).
 
  This option is a string and takes the (space-separated) names of the
  firmware files -- the same names that appear in MODULE_FIRMWARE()
@@ -129,7 +138,7 @@ config EXTRA_FIRMWARE
  For example, you might set CONFIG_EXTRA_FIRMWARE="usb8388.bin", copy
  the usb8388.bin file into the firmware directory, and build the 
kernel.
  Then any request_firmware("usb8388.bin") will be satisfied internally
- without needing to call out to userspace.
+ without needing to call out to the filesystem.
 
  WARNING: If you include additional firmware files into your binary
  kernel image that are not available under the terms of the GPL,
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] ARM: dts: sun8i-a33: Add security system crypto engine clock and device nodes

2015-09-22 Thread Chen-Yu Tsai
A33 has the same "Security System" crypto engine as A10/A20, but with a
separate reset control.

Signed-off-by: Chen-Yu Tsai 
---
 arch/arm/boot/dts/sun8i-a33.dtsi | 18 ++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm/boot/dts/sun8i-a33.dtsi b/arch/arm/boot/dts/sun8i-a33.dtsi
index 3457edb3bf50..001d8402ca18 100644
--- a/arch/arm/boot/dts/sun8i-a33.dtsi
+++ b/arch/arm/boot/dts/sun8i-a33.dtsi
@@ -99,6 +99,14 @@
"ahb1_sat";
};
 
+   ss_clk: clk@01c2009c {
+   #clock-cells = <0>;
+   compatible = "allwinner,sun4i-a10-mod0-clk";
+   reg = <0x01c2009c 0x4>;
+   clocks = <>, < 0>;
+   clock-output-names = "ss";
+   };
+
mbus_clk: clk@01c2015c {
#clock-cells = <0>;
compatible = "allwinner,sun8i-a23-mbus-clk";
@@ -109,6 +117,16 @@
};
 
soc@01c0 {
+   crypto: crypto-engine@01c15000 {
+   compatible = "allwinner,sun4i-a10-crypto";
+   reg = <0x01c15000 0x1000>;
+   interrupts = ;
+   clocks = <_gates 5>, <_clk>;
+   clock-names = "ahb", "mod";
+   resets = <_rst 5>;
+   reset-names = "ahb";
+   };
+
usb_otg: usb@01c19000 {
compatible = "allwinner,sun8i-a33-musb";
reg = <0x01c19000 0x0400>;
-- 
2.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: Tree for Sep 23

2015-09-22 Thread Stephen Rothwell
Hi all,

Changes since 20150922:

The berlin tree gained a build failure so I used the version from
next-20150922.

I used the h8300 tree from next-20150828 since the current tree has been
rebased onto something very old :-(

The net-next tree lost its build failure.

The akpm tree gained a conflict against the drm-misc tree.

Non-merge commits (relative to Linus' tree): 2544
 2193 files changed, 117790 insertions(+), 31690 deletions(-)



I have created today's linux-next tree at
git://git.kernel.org/pub/scm/linux/kernel/git/next/linux-next.git
(patches at http://www.kernel.org/pub/linux/kernel/next/ ).  If you
are tracking the linux-next tree using git, you should not use "git pull"
to do so as that will try to merge the new linux-next release with the
old one.  You should use "git fetch" and checkout or reset to the new
master.

You can see which trees have been included by looking in the Next/Trees
file in the source.  There are also quilt-import.log and merge.log
files in the Next directory.  Between each merge, the tree was built
with a ppc64_defconfig for powerpc and an allmodconfig for x86_64,
a multi_v7_defconfig for arm and a native build of tools/perf. After
the final fixups (if any), it is also built with powerpc allnoconfig
(32 and 64 bit), ppc44x_defconfig and allyesconfig (this fails its final
link) and i386, sparc, sparc64 and arm defconfig.

Below is a summary of the state of the merge.

I am currently merging 226 trees (counting Linus' and 33 trees of patches
pending for Linus' tree).

Stats about the size of the tree over time can be seen at
http://neuling.org/linux-next-size.html .

Status of my local build tests will be at
http://kisskb.ellerman.id.au/linux-next .  If maintainers want to give
advice about cross compilers/configs that work, we are always open to add
more builds.

Thanks to Randy Dunlap for doing many randconfig builds.  And to Paul
Gortmaker for triage and bug fixes.

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au

$ git checkout master
$ git reset --hard stable
Merging origin/master (bcee19f424a0 Merge branch 'for-4.3-fixes' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup)
Merging fixes/master (c7e9ad7da219 Merge branch 'perf-urgent-for-linus' of 
git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip)
Merging kbuild-current/rc-fixes (3d1450d54a4f Makefile: Force gzip and xz on 
module install)
Merging arc-current/for-curr (e4140819dadc ARC: signal handling robustify)
Merging arm-current/fixes (7ae85dc7687c ARM: 8425/1: kgdb: Don't try to stop 
the machine when setting breakpoints)
Merging m68k-current/for-linus (1ecb40643a9a m68k/bootinfo: Use kmemdup rather 
than duplicating its implementation)
Merging metag-fixes/fixes (0164a711c97b metag: Fix ioremap_wc/ioremap_cached 
build errors)
Merging mips-fixes/mips-fixes (1795cd9b3a91 Linux 3.16-rc5)
Merging powerpc-fixes/fixes (400c47d81ca3 powerpc32: memset: only use dcbz once 
cache is enabled)
Merging powerpc-merge-mpe/fixes (bc0195aad0da Linux 4.2-rc2)
Merging sparc/master (73958c651fbf sparc64: use ENTRY/ENDPROC in VISsave)
Merging net/master (29c6852602e2 inet: fix races in reqsk_queue_hash_req())
Merging ipsec/master (04a6b8bfee06 xfrm6: Fix ICMPv6 and MH header checks in 
_decode_session6)
Merging sound-current/for-linus (5ee20bc79246 ALSA: usb-audio: Change internal 
PCM order)
Merging pci-current/for-linus (6044546d5ee6 PCI: Clear IORESOURCE_UNSET when 
clipping a bridge window)
Merging wireless-drivers/master (c2e7204d180f tcp_cubic: do not set epoch_start 
in the future)
Merging driver-core.current/driver-core-linus (2110d70c5e58 cpu/cacheinfo: Fix 
teardown path)
Merging tty.current/tty-linus (f7a7651fcd40 tty: serial: Add missing module 
license for 8250_base.ko)
Merging usb.current/usb-linus (cc8e4fc0c3b5 xhci: init command timeout timer 
earlier to avoid deleting it uninitialized)
Merging usb-gadget-fixes/fixes (a66c275b3d5d usb: dwc3: gadget: Fix BUG in RT 
config)
Merging usb-serial-fixes/usb-linus (19ab6bc5674a USB: option: add ZTE PIDs)
Merging staging.current/staging-linus (74c600e36455 MAINTAINERS: Update email 
address for Martyn Welch)
Merging char-misc.current/char-misc-linus (50314035d6b1 Merge tag 
'extcon-fixes-for-4.3-rc3' of 
git://git.kernel.org/pub/scm/linux/kernel/git/chanwoo/extcon into 
char-misc-linus)
Merging input-current/for-linus (72d4736253af Input: uinput - fix crash when 
using ABS events)
Merging crypto-current/master (09185e2756a8 hwrng: xgene - fix handling 
platform_get_irq)
Merging ide/master (d681f1166919 ide: remove deprecated use of pci api)
Merging devicetree-current/devicetree/merge (f76502aa9140 of/dynamic: Fix test 
for PPC_PSERIES)
Merging rr-fixes/fixes (275d7d44d802 module: Fix locking in symbol_put_addr())
Merging vfio-fixes/for-linus (4bc94d5dc95d vfio: Fix lockdep issue)
Merging kselftest-fixes/fixes (ae7858180510 selftests: exec: rev

Re: [RFC PATCH v5 2/4] genirq: add irq_migrate_all_off_this_cpu() for cpu hotplug

2015-09-22 Thread Yang Yingliang



On 2015/9/23 2:54, Thomas Gleixner wrote:

On Sat, 19 Sep 2015, Yang Yingliang wrote:


Add irq_migrate_all_off_this_cpu() into kernel/irq/migration.c.


This doesn't make any sense at all.

You just reuse the existing file to stick your new code into it
without reusing a single bit in that file. Aside of that it's
unconditionally compiled, which means all existing users of
CONFIG_GENERIC_PENDING_IRQ are burdened with pointless code.

The right thing to do is:

Add that code to a new file: kernel/irq/cpuhotplug.c and make that
depend on CONFIG_GENERIC_IRQ_MIGRATION.


How about add #ifdef CONFIG_CPU_HOTPLUG around new code
in kernel/irq/migration.c ?

Thanks,
Yang


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [4.3-rc1, regression] dax: hang on i_mmap_rwsem in generic/075

2015-09-22 Thread Ross Zwisler
On Wed, Sep 23, 2015 at 05:56:31AM +1000, Dave Chinner wrote:
> On Tue, Sep 22, 2015 at 01:06:45PM +0300, Kirill A. Shutemov wrote:
> > On Tue, Sep 22, 2015 at 01:05:55PM +1000, Dave Chinner wrote:
> > > Hi folks,
> > > 
> > > I'm seeing hangs like this when using DAX on XFS on 4.3-rc1 running
> > > xfstests generic/075 (fsx test):
> > > 
> > > # echo w > /proc/sysrq-trigger
> > > [71628.984872] sysrq: SysRq : Show Blocked State
> > > [71628.985988]   taskPC stack   pid father
> > > [71628.987635] fsx D 88043fd756c0 12824   520  32636 
> > > 0x
> > > [71628.989251]  88007f557ba8 0086 88042eb40580 
> > > 8803c8bcc180
> > > [71628.990645]  88007f558000 88041d748e80 88041d748e68 
> > > 
> > > [71628.992068]  fffe 88007f557bc0 81d855ca 
> > > 8803c8bcc180
> > > [71628.993639] Call Trace:
> > > [71628.994097]  [] schedule+0x3a/0x90
> > > [71628.994997]  [] rwsem_down_write_failed+0x141/0x340
> > > [71628.996197]  [] 
> > > call_rwsem_down_write_failed+0x13/0x20
> > > [71628.997548]  [] ? down_write+0x24/0x40
> > > [71628.998502]  [] __dax_fault+0x546/0x6c0
> > > [71628.999469]  [] ? xfs_get_blocks+0x20/0x20
> > > [71629.000515]  [] xfs_filemap_fault+0xc8/0xf0
> > > [71629.001668]  [] __do_fault+0x3d/0x80
> > > [71629.002589]  [] handle_mm_fault+0xb8a/0xfd0
> > > [71629.003620]  [] __do_page_fault+0x15f/0x420
> > > [71629.004680]  [] trace_do_page_fault+0x43/0x110
> > > [71629.005877]  [] do_async_page_fault+0x1a/0xa0
> > > [71629.006936]  [] async_page_fault+0x28/0x30
> > > 
> > > __dax_fault() gets stuck on this lock:
> > > 
> > > (gdb) l *(__dax_fault+0x546)
> > > 0x812110b6 is in __dax_fault (include/linux/fs.h:499).
> > > 494
> > > 495 int mapping_tagged(struct address_space *mapping, int tag);
> > > 496
> > > 497 static inline void i_mmap_lock_write(struct address_space 
> > > *mapping)
> > > 498 {
> > > 499 down_write(>i_mmap_rwsem);
> > > 500 }
> > > 501
> > > 502 static inline void i_mmap_unlock_write(struct address_space 
> > > *mapping)
> > > 503 {
> > > 
> > > This didn't happen on 4.2 + the XFS for-next code that was merged
> > > into 4.3-rc1, so it's come from changes somewhere else in the merge.
> > > I suspect either of these two commits:
> > > 
> > > 46c043e mm: take i_mmap_lock in unmap_mapping_range() for DAX
> > > 8431729 dax: fix race between simultaneous faults
> > > 
> > > as they both modified the i_mmap_lock usage for DAX page faults.
> > 
> > It's likely we broke some locking ordering rules, but it's not obvious for
> > me which one.
> > 
> > No lockdep complain? Or it's disabled?
> 
> Wasn't running lockdep, I don't always use it because of how slow it
> can make things. I'll turn it on and see what happens...

I just wanted to let you know that I was able to reproduce this on my test
setup and am planning on tracking it down tomorrow, unless someone gets to it
first.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of redundant mpic_irq_set_wake

2015-09-22 Thread Scott Wood
On Mon, 2015-09-21 at 16:47 +0100, Sudeep Holla wrote:
> mpic_irq_set_wake return -ENXIO for non FSL MPIC and sets IRQF_NO_SUSPEND
> flag for FSL ones. enable_irq_wake already returns -ENXIO if irq_set_wak
> is not implemented. Also there's no need to set the IRQF_NO_SUSPEND flag
> as it doesn't guarantee wakeup for that interrupt.
> 
> This patch removes the redundant mpic_irq_set_wake and sets the
> IRQCHIP_SKIP_SET_WAKE for only FSL MPIC.
> 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Scott Wood 
> Cc: Hongtao Jia 
> Cc: Marc Zyngier 
> Cc: linuxppc-...@lists.ozlabs.org
> Signed-off-by: Sudeep Holla 
> ---
>  arch/powerpc/sysdev/mpic.c | 23 ---
>  1 file changed, 4 insertions(+), 19 deletions(-)

Acked-by: Scott Wood 

-Scott

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE muram

2015-09-22 Thread Scott Wood
On Tue, 2015-09-22 at 21:20 -0500, Zhao Qiang-B45475 wrote:
> On Wen, Sep 23, 2015 at 8:19 AM +0800, Wood Scott-B07421 wrote:
> 
> > > > >  {
> > > > > - int ret;
> > > > > +
> > > > > + unsigned long start;
> > > > >   unsigned long flags;
> > > > > + unsigned long size_alloc = size; struct muram_block *entry; int
> > > > > + end_bit; int order = muram_pool->min_alloc_order;
> > > > > 
> > > > >   spin_lock_irqsave(_muram_lock, flags);
> > > > > - ret = rh_free(_muram_info, offset);
> > > > > + end_bit = (offset >> order) + ((size + (1UL << order) - 1) >>
> > > > order);
> > > > > + if ((offset + size) > (end_bit << order))
> > > > > + size_alloc = size + (1UL << order);
> > > > 
> > > > Why do you need to do all these calculations here?
> > > 
> > > So do it in gen_pool_fixed_alloc?
> > 
> > Could you explain why they're needed at all?
> 
> Why it does the calculations? 
> If the min block of gen_pool is 8 bytes, and I want to allocate a 
> Region with offset=7, size=8bytes, I actually need block 0 and block 1,
> And the allocation will give me block 0.  

How can you have offset 7 if the minimum order is 2 bytes?

-Scott

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] memcg: make mem_cgroup_read_stat() unsigned

2015-09-22 Thread Andrew Morton
On Tue, 22 Sep 2015 17:42:13 -0700 Greg Thelen  wrote:

> Andrew Morton wrote:
> 
> > On Tue, 22 Sep 2015 15:16:32 -0700 Greg Thelen  wrote:
> >
> >> mem_cgroup_read_stat() returns a page count by summing per cpu page
> >> counters.  The summing is racy wrt. updates, so a transient negative sum
> >> is possible.  Callers don't want negative values:
> >> - mem_cgroup_wb_stats() doesn't want negative nr_dirty or nr_writeback.
> >> - oom reports and memory.stat shouldn't show confusing negative usage.
> >> - tree_usage() already avoids negatives.
> >>
> >> Avoid returning negative page counts from mem_cgroup_read_stat() and
> >> convert it to unsigned.
> >
> > Someone please remind me why this code doesn't use the existing
> > percpu_counter library which solved this problem years ago.
> >
> >>   for_each_possible_cpu(cpu)
> >
> > and which doesn't iterate across offlined CPUs.
> 
> I found [1] and [2] discussing memory layout differences between:
> a) existing memcg hand rolled per cpu arrays of counters
> vs
> b) array of generic percpu_counter
> The current approach was claimed to have lower memory overhead and
> better cache behavior.
> 
> I assume it's pretty straightforward to create generic
> percpu_counter_array routines which memcg could use.  Possibly something
> like this could be made general enough could be created to satisfy
> vmstat, but less clear.
> 
> [1] http://www.spinics.net/lists/cgroups/msg06216.html
> [2] https://lkml.org/lkml/2014/9/11/1057

That all sounds rather bogus to me.  __percpu_counter_add() doesn't
modify struct percpu_counter at all except for when the cpu-local
counter overflows the configured batch size.  And for the memcg
application I suspect we can set the batch size to INT_MAX...


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of redundant mpic_irq_set_wake

2015-09-22 Thread Thomas Gleixner
On Wed, 23 Sep 2015, Wang Dongsheng wrote:
> > On Mon, 2015-09-21 at 16:47 +0100, Sudeep Holla wrote:
> > > mpic_irq_set_wake return -ENXIO for non FSL MPIC and sets IRQF_NO_SUSPEND
> > > flag for FSL ones. enable_irq_wake already returns -ENXIO if irq_set_wak
> > > is not implemented. Also there's no need to set the IRQF_NO_SUSPEND flag
> > > as it doesn't guarantee wakeup for that interrupt.
> > >
> 
> Non-freescale return -ENXIO, is there any issue? If non-freescale
> platform does not support it, but IPs still use
> enable/disable_irq_wake, we should return a error number.

You can just set IRQCHIP_SKIP_SET_WAKE for FSL chips and not for the
others.

> @Scott:
> If set this flag we cannot keep a irq as a wakeup source when system going to
> SUSPEND or MEM.
> 
> irq_set_wake() means we can set this irq as a wake source.
> IRQCHIP_SKIP_SET_WAKE is ignore irq_set_wake() feature.

Nonsense. IRQCHIP_SKIP_SET_WAKE merily tells the core not to bail on
!chip->irq_set_wake(), but its still marking the interrupt as wakeup
source and therefor not masking it on suspend.

IRQF_NO_SUSPEND is the wrong tool. End of story.

Thanks,

tglx
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


linux-next: manual merge of the akpm tree with the drm-misc tree

2015-09-22 Thread Stephen Rothwell
Hi Andrew,

Today's linux-next merge of the akpm tree got a conflict in:

  drivers/gpu/drm/drm_irq.c

between commit:

  4e32087d8341 ("drm: Use vblank timestamps to guesstimate how many vblanks 
were missed")

from the drm-misc tree and patch:

   "Remove abs64()"

from the akpm tree.

I fixed it up (the former removed the instances of abs64 in thet file)
and can carry the fix as necessary (no action is required).

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/2] ASoC: rockchip: i2s: add 8 channels capture and lrck-mode support

2015-09-22 Thread Sugar Zhang
support max 8 channels capture, please add property
'rockchip,capture-channels' in dts to enable this,
if not, support 2 channels capture default.

support lrck clk mode configuration, there are 3 modes:

 - txrx: lrck_tx and lrck_rx are different.
 - tx_share: lrck_tx is shared with lrck_rx.
 - rx_share: lrck_rx is shared with lrck_tx.

to enable this, please add property 'rockchip,lrck-mode' in dts,
if not, use 'txrx' lrck mode default.

Signed-off-by: Sugar Zhang 
---
 sound/soc/rockchip/rockchip_i2s.c | 48 +--
 sound/soc/rockchip/rockchip_i2s.h | 23 +++
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/sound/soc/rockchip/rockchip_i2s.c 
b/sound/soc/rockchip/rockchip_i2s.c
index b936102..a8cb414 100644
--- a/sound/soc/rockchip/rockchip_i2s.c
+++ b/sound/soc/rockchip/rockchip_i2s.c
@@ -245,8 +245,34 @@ static int rockchip_i2s_hw_params(struct snd_pcm_substream 
*substream,
return -EINVAL;
}
 
-   regmap_update_bits(i2s->regmap, I2S_TXCR, I2S_TXCR_VDW_MASK, val);
-   regmap_update_bits(i2s->regmap, I2S_RXCR, I2S_RXCR_VDW_MASK, val);
+   switch (params_channels(params)) {
+   case 8:
+   val |= I2S_CHN_8;
+   break;
+   case 6:
+   val |= I2S_CHN_6;
+   break;
+   case 4:
+   val |= I2S_CHN_4;
+   break;
+   case 2:
+   val |= I2S_CHN_2;
+   break;
+   default:
+   dev_err(i2s->dev, "invalid channel: %d\n",
+   params_channels(params));
+   return -EINVAL;
+   }
+
+   if (substream->stream == SNDRV_PCM_STREAM_CAPTURE)
+   regmap_update_bits(i2s->regmap, I2S_RXCR,
+  I2S_RXCR_VDW_MASK | I2S_RXCR_CSR_MASK,
+  val);
+   else
+   regmap_update_bits(i2s->regmap, I2S_TXCR,
+  I2S_TXCR_VDW_MASK | I2S_TXCR_CSR_MASK,
+  val);
+
regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_TDL_MASK,
   I2S_DMACR_TDL(16));
regmap_update_bits(i2s->regmap, I2S_DMACR, I2S_DMACR_RDL_MASK,
@@ -415,10 +441,12 @@ static const struct regmap_config 
rockchip_i2s_regmap_config = {
 
 static int rockchip_i2s_probe(struct platform_device *pdev)
 {
+   struct device_node *node = pdev->dev.of_node;
struct rk_i2s_dev *i2s;
struct resource *res;
void __iomem *regs;
int ret;
+   int val;
 
i2s = devm_kzalloc(>dev, sizeof(*i2s), GFP_KERNEL);
if (!i2s) {
@@ -475,6 +503,22 @@ static int rockchip_i2s_probe(struct platform_device *pdev)
goto err_pm_disable;
}
 
+   /* refine capture channels */
+   if (!of_property_read_u32(node, "rockchip,capture-channels", )) {
+   if (val >= 2 && val <= 8)
+   rockchip_i2s_dai.capture.channels_max = val;
+   else
+   rockchip_i2s_dai.capture.channels_max = 2;
+   }
+
+   /* configure tx/rx lrck use mode */
+   if (!of_property_read_u32(node, "rockchip,lrck-mode", )) {
+   if (val >= LRCK_TXRX && val <= LRCK_RX_SHARE)
+   regmap_update_bits(i2s->regmap, I2S_CKR,
+  I2S_CKR_TRCM_MASK,
+  I2S_CKR_TRCM(val));
+   }
+
ret = devm_snd_soc_register_component(>dev,
  _i2s_component,
  _i2s_dai, 1);
diff --git a/sound/soc/rockchip/rockchip_i2s.h 
b/sound/soc/rockchip/rockchip_i2s.h
index 93f456f..0d285d1 100644
--- a/sound/soc/rockchip/rockchip_i2s.h
+++ b/sound/soc/rockchip/rockchip_i2s.h
@@ -49,6 +49,9 @@
  * RXCR
  * receive operation control register
 */
+#define I2S_RXCR_CSR_SHIFT 15
+#define I2S_RXCR_CSR(x)(x << I2S_RXCR_CSR_SHIFT)
+#define I2S_RXCR_CSR_MASK  (3 << I2S_RXCR_CSR_SHIFT)
 #define I2S_RXCR_HWT   BIT(14)
 #define I2S_RXCR_SJM_SHIFT 12
 #define I2S_RXCR_SJM_R (0 << I2S_RXCR_SJM_SHIFT)
@@ -75,6 +78,12 @@
  * CKR
  * clock generation register
 */
+#define I2S_CKR_TRCM_SHIFT 28
+#define I2S_CKR_TRCM(x)(x << I2S_CKR_TRCM_SHIFT)
+#define I2S_CKR_TRCM_TXRX  (0 << I2S_CKR_TRCM_SHIFT)
+#define I2S_CKR_TRCM_TXSHARE   (1 << I2S_CKR_TRCM_SHIFT)
+#define I2S_CKR_TRCM_RXSHARE   (2 << I2S_CKR_TRCM_SHIFT)
+#define I2S_CKR_TRCM_MASK  (3 << I2S_CKR_TRCM_SHIFT)
 #define I2S_CKR_MSS_SHIFT  27
 #define I2S_CKR_MSS_MASTER (0 << I2S_CKR_MSS_SHIFT)
 #define I2S_CKR_MSS_SLAVE  (1 << I2S_CKR_MSS_SHIFT)
@@ -207,6 +216,20 @@ enum {
ROCKCHIP_DIV_BCLK,
 };
 
+/* channel select */
+#define I2S_CSR_SHIFT  15
+#define I2S_CHN_2  (0 << I2S_CSR_SHIFT)
+#define I2S_CHN_4  (1 << I2S_CSR_SHIFT)
+#define I2S_CHN_6   

[PATCH 2/2] Documentation: DT bindings: rockchip-i2s: add capture and lrck-mode

2015-09-22 Thread Sugar Zhang
rockchip,capture-channels: max capture channels, 2 channels default.
rockchip,lrck-mode: 0: rxtx separate, 1: tx share, 2: rx share.
default use 'rxtx separate' mode.

Signed-off-by: Sugar Zhang 
---
 Documentation/devicetree/bindings/sound/rockchip-i2s.txt | 5 +
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/sound/rockchip-i2s.txt 
b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt
index 9b82c20..4066b85 100644
--- a/Documentation/devicetree/bindings/sound/rockchip-i2s.txt
+++ b/Documentation/devicetree/bindings/sound/rockchip-i2s.txt
@@ -21,6 +21,9 @@ Required properties:
 - clock-names: should contain followings:
- "i2s_hclk": clock for I2S BUS
- "i2s_clk" : clock for I2S controller
+- rockchip,capture-channels: max capture channels, if not set, 2 channels 
default.
+- rockchip,lrck-mode: select lrck use mode: 0: rxtx separate, 1: tx share, 2: 
rx share.
+  default use 'rxtx seprate' mode.
 
 Example for rk3288 I2S controller:
 
@@ -34,4 +37,6 @@ i2s@ff89 {
dma-names = "tx", "rx";
clock-names = "i2s_hclk", "i2s_clk";
clocks = < HCLK_I2S0>, < SCLK_I2S0>;
+   rockchip,capture-channels = <2>;
+   rockchip,lrck-mode = <0>;
 };
-- 
2.3.6


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 0/2] ASoC: rockchip: i2s: add 8 channels capture and lrck-mode

2015-09-22 Thread Sugar Zhang
Support max 8 channels capture.
support lrck clk mode configuration, there are 3 modes:

 - txrx: lrck_tx and lrck_rx are different.
 - tx_share: lrck_tx is shared with lrck_rx.
 - rx_share: lrck_rx is shared with lrck_tx.

Sugar Zhang (2):
  ASoC: rockchip: i2s: add 8 channels capture and lrck-mode support
  Documentation: DT bindings: rockchip-i2s: add capture and lrck-mode

 .../devicetree/bindings/sound/rockchip-i2s.txt |  5 +++
 sound/soc/rockchip/rockchip_i2s.c  | 48 +-
 sound/soc/rockchip/rockchip_i2s.h  | 23 +++
 3 files changed, 74 insertions(+), 2 deletions(-)

-- 
2.3.6


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] zbud: allow up to PAGE_SIZE allocations

2015-09-22 Thread Seth Jennings
On Tue, Sep 22, 2015 at 02:17:33PM +0200, Vitaly Wool wrote:
> Currently zbud is only capable of allocating not more than
> PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE. This is okay as
> long as only zswap is using it, but other users of zbud may
> (and likely will) want to allocate up to PAGE_SIZE. This patch
> addresses that by skipping the creation of zbud internal
> structure in the beginning of an allocated page (such pages are
> then called 'headless').

I guess I'm having trouble with this.  If you store a PAGE_SIZE
allocation in zbud, then the zpage can only have one allocation as there
is no room for a buddy.  So... we have an allocator for that: the
page allocator.

zbud doesn't support this by design because, if you are only storing one
allocation per page, you don't gain anything.

This functionality creates many new edge cases for the code.

What is this use case you envision?  I think we need to discuss
whether the use case exists and if it justifies the added complexity.

We are crossing a boundary into zsmalloc style complexity with storing
stuff in the struct page, something I really didn't want to do in zbud.

zbud is the simple one, zsmalloc is the complex one.  I'd hate to have
two complex ones :-/

Seth

> 
> As a zbud page is no longer guaranteed to contain zbud header, the
> following changes had to be applied throughout the code:
> * page->lru to be used for zbud page lists
> * page->private to hold 'under_reclaim' flag
> 
> page->private will also be used to indicate if this page contains
> a zbud header in the beginning or not ('headless' flag).
> 
> Signed-off-by: Vitaly Wool 
> ---
>  mm/zbud.c | 194 
> +-
>  1 file changed, 128 insertions(+), 66 deletions(-)
> 
> diff --git a/mm/zbud.c b/mm/zbud.c
> index fa48bcdf..7b51eb6 100644
> --- a/mm/zbud.c
> +++ b/mm/zbud.c
> @@ -105,18 +105,25 @@ struct zbud_pool {
>  
>  /*
>   * struct zbud_header - zbud page metadata occupying the first chunk of each
> - *   zbud page.
> + *   zbud page, except for HEADLESS pages
>   * @buddy:   links the zbud page into the unbuddied/buddied lists in the pool
> - * @lru: links the zbud page into the lru list in the pool
>   * @first_chunks:the size of the first buddy in chunks, 0 if free
>   * @last_chunks: the size of the last buddy in chunks, 0 if free
>   */
>  struct zbud_header {
>   struct list_head buddy;
> - struct list_head lru;
>   unsigned int first_chunks;
>   unsigned int last_chunks;
> - bool under_reclaim;
> +};
> +
> +/*
> + * struct zbud_page_priv - zbud flags to be stored in page->private
> + * @under_reclaim: if a zbud page is under reclaim
> + * @headless: indicates a page where zbud header didn't fit
> + */
> +struct zbud_page_priv {
> + bool under_reclaim:1;
> + bool headless:1;
>  };
>  
>  /*
> @@ -221,6 +228,7 @@ MODULE_ALIAS("zpool-zbud");
>  */
>  /* Just to make the code easier to read */
>  enum buddy {
> + HEADLESS,
>   FIRST,
>   LAST
>  };
> @@ -237,12 +245,15 @@ static int size_to_chunks(size_t size)
>  /* Initializes the zbud header of a newly allocated zbud page */
>  static struct zbud_header *init_zbud_page(struct page *page)
>  {
> + struct zbud_page_priv *ppriv = (struct zbud_page_priv *)page->private;
>   struct zbud_header *zhdr = page_address(page);
> +
> + INIT_LIST_HEAD(>lru);
> + ppriv->under_reclaim = 0;
> +
>   zhdr->first_chunks = 0;
>   zhdr->last_chunks = 0;
>   INIT_LIST_HEAD(>buddy);
> - INIT_LIST_HEAD(>lru);
> - zhdr->under_reclaim = 0;
>   return zhdr;
>  }
>  
> @@ -267,11 +278,22 @@ static unsigned long encode_handle(struct zbud_header 
> *zhdr, enum buddy bud)
>* over the zbud header in the first chunk.
>*/
>   handle = (unsigned long)zhdr;
> - if (bud == FIRST)
> + switch (bud) {
> + case FIRST:
>   /* skip over zbud header */
>   handle += ZHDR_SIZE_ALIGNED;
> - else /* bud == LAST */
> + break;
> + case LAST:
>   handle += PAGE_SIZE - (zhdr->last_chunks  << CHUNK_SHIFT);
> + break;
> + case HEADLESS:
> + break;
> + default:
> + /* this should never happen */
> + pr_err("zbud: invalid buddy value %d\n", bud);
> + handle = 0;
> + break;
> + }
>   return handle;
>  }
>  
> @@ -287,6 +309,7 @@ static int num_free_chunks(struct zbud_header *zhdr)
>   /*
>* Rather than branch for different situations, just use the fact that
>* free buddies have a length of zero to simplify everything.
> +  * NB: can't be used with HEADLESS pages.
>*/
>   return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
>  }
> @@ -353,31 +376,40 @@ void zbud_destroy_pool(struct zbud_pool *pool)
>  int zbud_alloc(struct zbud_pool *pool, size_t size, 

linux-next: build warning after merge of the drm-misc tree

2015-09-22 Thread Stephen Rothwell
Hi all,

After merging the drm-misc tree, today's linux-next build (arm
multi_v7_defconfig) produced this warning:

drivers/gpu/drm/drm_crtc.c: In function 'drm_fb_release':
drivers/gpu/drm/drm_crtc.c:3494:21: warning: unused variable 'dev' 
[-Wunused-variable]
  struct drm_device *dev = priv->minor->dev;
 ^

Introduced by commit

  3d2e74c94432 ("drm/core: Preserve the fb id on close.")

-- 
Cheers,
Stephen Rothwells...@canb.auug.org.au
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 2/3] usb: gadget: f_midi: free usb request when done

2015-09-22 Thread Peter Chen
On Tue, Sep 22, 2015 at 07:59:09PM +0100, Felipe F. Tonello wrote:
> req->actual == req->length means that there is no data left to enqueue,
> so free the request.
> 
> Signed-off-by: Felipe F. Tonello 
> ---
>  drivers/usb/gadget/function/f_midi.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/usb/gadget/function/f_midi.c 
> b/drivers/usb/gadget/function/f_midi.c
> index edb84ca..e92aff5 100644
> --- a/drivers/usb/gadget/function/f_midi.c
> +++ b/drivers/usb/gadget/function/f_midi.c
> @@ -258,7 +258,10 @@ f_midi_complete(struct usb_ep *ep, struct usb_request 
> *req)
>   } else if (ep == midi->in_ep) {
>   /* Our transmit completed. See if there's more to go.
>* f_midi_transmit eats req, don't queue it again. */
> - f_midi_transmit(midi, req);
> + if (req->actual < req->length)
> + f_midi_transmit(midi, req);
> + else
> + free_ep_req(ep, req);
>   return;
>   }

It is incorrect, if no reqeust in queue, how device knows when
the host sends data?

-- 

Best Regards,
Peter Chen
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2 3/6] staging: fsl-mc: up-rev dpbp binary interface to v2.0

2015-09-22 Thread Greg KH
On Tue, Sep 22, 2015 at 06:08:56PM -0500, J. German Rivera wrote:
> Add cmd_flags parameter to all dpbp APIs to comply
> with the dpbp 2.0 MC interface. Updated MC version
> major number. Pass irq args in struct instead of
> separate args.
> 
> Signed-off-by: J. German Rivera 
> ---
>  drivers/staging/fsl-mc/bus/dpbp.c | 136 
> --
>  drivers/staging/fsl-mc/include/dpbp-cmd.h |   2 +-
>  drivers/staging/fsl-mc/include/dpbp.h |  91 +++-
>  3 files changed, 162 insertions(+), 67 deletions(-)
> 
> diff --git a/drivers/staging/fsl-mc/bus/dpbp.c 
> b/drivers/staging/fsl-mc/bus/dpbp.c
> index d99ab6d..0004e65 100644
> --- a/drivers/staging/fsl-mc/bus/dpbp.c
> +++ b/drivers/staging/fsl-mc/bus/dpbp.c
> @@ -34,14 +34,19 @@
>  #include "../include/dpbp.h"
>  #include "../include/dpbp-cmd.h"
> 
> -int dpbp_open(struct fsl_mc_io *mc_io, int dpbp_id, uint16_t *token)
> +int dpbp_open(struct fsl_mc_io *mc_io,
> +   uint32_t cmd_flags,
> +   int dpbp_id,
> +   uint16_t *token)
>  {
>   struct mc_command cmd = { 0 };
>   int err;
> 
>   /* prepare command */
>   cmd.header = mc_encode_cmd_header(DPBP_CMDID_OPEN,
> -   MC_CMD_PRI_LOW, 0);
> +   cmd_flags,
> +   0);

Why are you reformatting all of these calls when you don't have to
create a new line?  Seems wasteful to me, as is:

> +

That empty line, why?

thanks,

greg k-h
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC PATCH] PM / Runtime: runtime: Add sysfs option for forcing runtime suspend

2015-09-22 Thread Oliver Neukum
On Tue, 2015-09-22 at 11:22 -0400, Alan Stern wrote:
> On Tue, 22 Sep 2015, Oliver Neukum wrote:
>  
> > Cancel, yes, going to low power is a consequence which needn't bother
> > the power subsystem.
> 
> Going to low power needn't involve the power subsystem?  That sounds 
> weird.

Think of it like rfkill. It makes sense to suspend an rfkilled device.
It still is the job of the driver to report that its device is idle.

> >  You need a callback. If there are spurious
> > events, the current heuristics will keep devices awake.
> > You must discard them anyway, as they are spurious. There's no point
> > in transporting over the bus at all. We can cease IO for input.
> > 
> > > This would create a parallel runtime-PM mechanism which is independent
> > > of the existing one.  Is that really a good idea?
> > 
> > It isn't strictly PM. It helps PM to do a better job, but
> > conceptually it is independent.
> 
> So my next question is: _How_ can this help PM to do a better job?  
> That is, what are the mechanisms?

"inhibit" -> driver stops input -> driver sets PM count to zero
-> PM subsystem acts

To go from the first to the second step a callback is needed

> One you have already stated: Lack of spurious events will help prevent 
> unwanted wakeups (or unwanted failures to go to sleep).

That too. We also save CPU cycles.

> But Dmitry made a stronger claim: Inhibiting an input device should 
> allow the device to go to low power.  I would like to know how we can 
> implement this cleanly.  The most straightforward approach is to use 
> runtime PM, but it's not obvious how this can be made to work with the 
> current API.

Yes, we can use the current API.
The key is that you think of the mechanism as induced idleness,
not forced suspend. We already have a perfectly working mechanism
for suspending idle devices.

Regards
Oliver




--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] perf record: Synthesize COMM event for a command line workload

2015-09-22 Thread Namhyung Kim
Hi Arnaldo,

On Tue, Sep 22, 2015 at 04:48:25PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Tue, Sep 22, 2015 at 04:25:34PM -0300, Arnaldo Carvalho de Melo escreveu:
> > Em Tue, Sep 22, 2015 at 09:24:55AM +0900, Namhyung Kim escreveu:
> > > When perf creates a new child to profile, the events are enabled on
> > > exec().  And in this case, it doesn't synthesize any event for the
> > > child since they'll be generated during exec().  But there's an window
> > > between the enabling and the event generation.
> > > 
> > > It used to be overcome since samples are only in kernel (so we always
> > > have the map) and the comm is overridden by a later COMM event.
> > > However it won't work if events are processed and displayed before the
> > > COMM event overrides like in 'perf script'.  This leads to those early
> > > samples (like native_write_msr_safe) not having a comm but pid (like
> > > ':15328').
> > > 
> > > So it needs to synthesize COMM event for the child explicitly before
> > > enabling so that it can have a correct comm.  But at this time, the
> > > comm will be "perf" since it's not exec-ed yet.
> > 
> > Don't we use enable-on-exec?
> > 
> >   # perf record usleep 1
> >   [ perf record: Woken up 1 times to write data ]
> >   [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
> >   # perf evlist -v
> >   cycles: size: 112, { sample_period, sample_freq }: 4000, sample_type:
> >   IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1,
> >   enable_on_exec: 1, task: 1, sample_id_all: 1, exclude_guest: 1, mmap2:
> >   1, comm_exec: 1
> >   #
> > 
> > Ok, but even then, if we use --show-task-events, we see that the comm
> > appears only later...
> > 
> >   # perf script --show-task-events
> > :4429  4429 27909.079372:  1 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> > :4429  4429 27909.079375:  1 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> > :4429  4429 27909.079376: 10 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> > :4429  4429 27909.079377:223 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> > :4429  4429 27909.079378:   6571 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >usleep  4429 27909.079380: PERF_RECORD_COMM exec: usleep:4429/4429
> >usleep  4429 27909.079381: 185403 cycles:  810a72d3 
> > flush_signal_handlers (/lib/modules/4.
> >usleep  4429 27909.079444:2241110 cycles:  7fc575355be3 
> > _dl_start (/usr/lib64/ld-2.20.so)
> >usleep  4429 27909.079875: PERF_RECORD_EXIT(4429:4429):(4429:4429)
> > 
> > While with your patch we see:
> > 
> >   # perf record usleep 1
> >   [ perf record: Woken up 1 times to write data ]
> >   [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
> >   # perf script --show-task-events
> >  perf 0 0.00: PERF_RECORD_COMM: perf:6571/6571
> >  perf  6571 28378.609330:  1 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >  perf  6571 28378.609334:  1 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >  perf  6571 28378.609335:  7 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >  perf  6571 28378.609336:163 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >  perf  6571 28378.609337:   4795 cycles:  8105f45a 
> > native_write_msr_safe (/lib/modules/4.
> >  perf  6571 28378.609340: 119283 cycles:  81189bdd 
> > perf_event_comm_output (/lib/modules/4
> >usleep  6571 28378.609340: PERF_RECORD_COMM exec: usleep:6571/6571
> >usleep  6571 28378.609380:1900804 cycles:  810a0b40 
> > find_next_iomem_res (/lib/modules/4.3.
> >usleep  6571 28378.609879: PERF_RECORD_EXIT(6571:6571):(6571:6571)
> > 
> > Better indeed, I'm adding this before/after so that the changeset comment
> > can fully illustrate what happens.

OK

> > 
> > But you see there is still a problem with that synthesized COMM, right? the 
> > one
> > coming from the kernel has the 6571 there, while the synthesized one 
> > doesn't...
> > Checking...
> > 
> > But I'll apply your patch.

Thanks

> 
> Humm, I think it is working by accident, as you're not allocating space
> for machine->id_hdr_size, please take a look at
> perf_event__synthesize_thread_map().
> 
> Right now its not a problem this line in perf_event__prepare_comm():
> 
> memset(event->comm.comm + size, 0, machine->id_hdr_size);
> 
> Because perf_event is an union and some of its elements, like mmap/mmap2
> have that PATH_MAX part, but its just a matter of the id_hdr_size
> becoming bigger than that and we'll have a problem...

Right. I'll send a fix to include the id_hdr part.


> 
> That zero there is probably because we're not synthesizing the
> PERF_SAMPLE_ part, that would 

Re: [PATCH v2] dax: fix NULL pointer in __dax_pmd_fault()

2015-09-22 Thread Dan Williams
On Tue, Sep 22, 2015 at 4:30 PM, Dave Chinner  wrote:
> On Tue, Sep 22, 2015 at 02:25:19PM -0700, Dan Williams wrote:
>> On Tue, Sep 22, 2015 at 2:13 PM, Andrew Morton
>>  wrote:
>> > On Tue, 22 Sep 2015 13:36:22 -0600 Ross Zwisler 
>> >  wrote:
>> >
>> >> The following commit:
>> >>
>> >> commit 46c043ede471 ("mm: take i_mmap_lock in unmap_mapping_range() for
>> >>   DAX")
>> >>
>> >> moved some code in __dax_pmd_fault() that was responsible for zeroing
>> >> newly allocated PMD pages.  The new location didn't properly set up
>> >> 'kaddr', though, so when run this code resulted in a NULL pointer BUG.
>> >>
>> >> Fix this by getting the correct 'kaddr' via bdev_direct_access().
>> >
>> > Why the heck didn't gcc warn?
>> >
>> > I had a fiddle:
>> >
>> > --- a/fs/dax.c~a
>> > +++ a/fs/dax.c
>> > @@ -529,15 +529,18 @@ int __dax_pmd_fault(struct vm_area_struc
>> > unsigned long pmd_addr = address & PMD_MASK;
>> > bool write = flags & FAULT_FLAG_WRITE;
>> > long length;
>> > -   void __pmem *kaddr;
>> > +   void *kaddr;
>> > pgoff_t size, pgoff;
>> > sector_t block, sector;
>> > unsigned long pfn;
>> > int result = 0;
>> >
>> > +// printk("%p\n", kaddr);
>> > +
>> > /* Fall back to PTEs if we're going to COW */
>> > if (write && !(vma->vm_flags & VM_SHARED))
>> > return VM_FAULT_FALLBACK;
>> > +   printk("%p\n", kaddr);
>> > /* If the PMD would extend outside the VMA */
>> > if (pmd_addr < vma->vm_start)
>> > return VM_FAULT_FALLBACK;
>> >
>> > gcc warns about the first printk, but not about the second.  So that
>> > "if (...) return ..." seems to have defeated gcc uninitialized-var
>> > detection.  wtf?
>> >
>> >> --- a/fs/dax.c
>> >> +++ b/fs/dax.c
>> >> @@ -569,8 +569,20 @@ int __dax_pmd_fault(struct vm_area_struct *vma, 
>> >> unsigned long address,
>> >>   if (!buffer_size_valid() || bh.b_size < PMD_SIZE)
>> >>   goto fallback;
>> >>
>> >> + sector = bh.b_blocknr << (blkbits - 9);
>> >> +
>> >>   if (buffer_unwritten() || buffer_new()) {
>> >>   int i;
>> >> +
>> >> + length = bdev_direct_access(bh.b_bdev, sector, , ,
>> >> + bh.b_size);
>> >> + if (length < 0) {
>> >> + result = VM_FAULT_SIGBUS;
>> >> + goto out;
>> >> + }
>> >> + if ((length < PMD_SIZE) || (pfn & PG_PMD_COLOUR))
>> >> + goto fallback;
>> >> +
>> >>   for (i = 0; i < PTRS_PER_PMD; i++)
>> >>   clear_pmem(kaddr + i * PAGE_SIZE, PAGE_SIZE);
>> >>   wmb_pmem();
>> >
>> > hm, that's a lot of copy-n-paste.  Do we really need to run
>> > bdev_direct_access() twice?  Will `kaddr' and `pfn' change?
>> >
>>
>> They shouldn't change, but I'm working on a fix for handling the race
>> of unbinding the pmem device while that kaddr is in use (unbind
>> invalidates kaddr).
>
> Exactly what does "unbinding the pmem device" mean,

echo namespace0.0 > /sys/bus/nd/drivers/nd_pmem/unbind

> and why can
> (parts of) the pmem device "go away" when there are active
> references to it?

Normally we have outstanding i/o requests to hold off
blk_cleanup_queue(), but in the dax case we don't have any mechanism
(yet) to flag the queue as busy.  I have some patches to add a
percpu_refcount for this purpose.

>
>> The proposal is a dax_map_bh()/dax_unmap_bh()
>> interface to temporarily pin the mapping around each usage.
>
> Which mapping? The bufferhead maps file offset to filesystem block
> addresses, so I'm not sure what problem you are actually refering
> to here...

The kaddr is coming from the devm_memremap() in the pmem driver that
gets unmapped after the device is released by the driver.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf probe: Fix module probing with shortname

2015-09-22 Thread Arnaldo Carvalho de Melo
Em Wed, Sep 23, 2015 at 09:14:44AM +0800, Wangnan (F) escreveu:
> 
> 
> On 2015/9/22 21:35, Arnaldo Carvalho de Melo wrote:
> >Em Tue, Sep 22, 2015 at 03:34:32AM +, Wang Nan escreveu:
> >>After commit 3d39ac538629e4f00a6e1c38d46346f1b8e69505 ("perf machine:
> >>No need to have two DSOs lists"), perf probe with module short name doesn't
> >>work again. For example:
> >>
> >>  # lsmod | grep e1000e
> >>  e1000e233472  0
> >>
> >>  # cat /proc/modules | grep e1000e
> >>  e1000e 233472 0 - Live 0xa0073000
> >>
> >>  # cat /proc/kallsyms | grep '\'
> >>  a0093860 t e1000e_up[e1000e]
> >>
> >>  # perf probe -v -m e1000e --add e1000e_up
> >>  probe-definition(0): e1000e_up
> >>  symbol:e1000e_up file:(null) line:0 offset:0 return:0 lazy:(null)
> >>  0 arguments
> >>  Failed to find module e1000e.
> >>  Could not open debuginfo. Try to use symbols.
> >>  Looking at the vmlinux_path (7 entries long)
> >>  Using /lib/modules/4.2.0-rc7+/build/vmlinux for symbols
> >>  e1000e_up is out of .text, skip it.
> >>Error: Failed to add events. Reason: No such file or directory (Code: 
> >> -2)
> >>
> >>This is caused by a misunderstood of dso->kernel in kernel_get_module_dso()
> >>that, for kernel module, dso->kernel is DSO_TYPE_USER. dso->kernel is 
> >>DSO_TYPE_KERNEL
> >>iff dso is vmlinux.
> >Kernel modules having DSO_TYPE_USER seems to be the bug, no? I'll try to
> >check that...
> 
> I also noticed this problem when I working on commit
> 1f121b03d058dd07199d8924373d3c52a207f63b ("perf tools: Deal with
> kernel module names in '[]' correctly") ;)

Thanks for working on this, it is an area that needs cleaning up, too
many ways to say what a dso is, will study your findings and try to come
up with a patch proposal tomorrow.

- Arnaldo
 
> It should be bug, but I think fixing it is costy. Here's an
> assumption that, if dso->kernel
> is not zero, the dso should be vmlinux (not kernel module):
> 
> $ grep 'dso.>kernel)' ./tools/perf/ -r
> ./tools/perf/builtin-inject.c:if (dso->kernel)
> ./tools/perf/util/symbol.c:if (dso->kernel) {
> ./tools/perf/util/symbol-elf.c:if (dso->kernel)
> ./tools/perf/util/symbol-elf.c:if (remap_kernel &&
> dso->kernel) {
> ./tools/perf/util/event.c:if (pos->dso->kernel)
> ./tools/perf/util/probe-event.c:if (dso->kernel)
> ./tools/perf/util/map.c: * map->dso->kernel) before calling
> __map__is_{kernel,kmodule}())
> ./tools/perf/util/map.c:if (!map->dso || !map->dso->kernel) {
> ./tools/perf/builtin-top.c:if (!map->dso->kernel)
> 
> So care must be taken.
> 
> Another solution seems simpler: we can redefine the meaning of enum
> dso_kernel_type like this:
> 
> # find  ./tools/perf/ -type f | xargs -n1 sed -i
> 's/DSO_TYPE_USER/DSO_TYPE_NOT_VMLINUX/g'
> # find  ./tools/perf/ -type f | xargs -n1 sed -i
> 's/DSO_TYPE_KERNEL/DSO_TYPE_VMLINUX/g'
> # find  ./tools/perf/ -type f | xargs -n1 sed -i
> 's/DSO_TYPE_GUEST_KERNEL/DSO_TYPE_GUEST_VMLINUX/g'
> 
> By fixing the name of DSO_TYPE_USER, kernel module with
> DSO_TYPE_NOT_VMLINUX seems
> not so buggy. (Please choose a better name...)
> 
> What's your opinion?
> 
> Thank you.
> 
> >- Arnaldo
> >
> >>This patch fix 'perf probe -m' with an ad-hoc way.
> >>
> >>After this patch:
> >>
> >>  # perf probe -v -m e1000e --add e1000e_up
> >>  probe-definition(0): e1000e_up
> >>  symbol:e1000e_up file:(null) line:0 offset:0 return:0 lazy:(null)
> >>  0 arguments
> >>  Open Debuginfo file: 
> >> /lib/modules/4.2.0-rc7+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko
> >>  Try to find probe point from debuginfo.
> >>  Matched function: e1000e_up
> >>  Probe point found: e1000e_up+0
> >>  Found 1 probe_trace_events.
> >>  Opening /sys/kernel/debug/tracing//kprobe_events write=1
> >>  Writing event: p:probe/e1000e_up e1000e:e1000e_up+0
> >>  Added new event:
> >>probe:e1000e_up  (on e1000e_up in e1000e)
> >>
> >>  You can now use it in all perf tools, such as:
> >>
> >>perf record -e probe:e1000e_up -aR sleep 1
> >>
> >>  # perf probe -l
> >>  Failed to find debug information for address a0093860
> >>probe:e1000e_up  (on e1000e_up in e1000e)
> >>
> >>Signed-off-by: Wang Nan 
> >>Cc: Arnaldo Carvalho de Melo 
> >>Cc: Namhyung Kim 
> >>Cc: Jiri Olsa 
> >>Cc: Masami Hiramatsu 
> >>---
> >>
> >>I think there may be other places where dso->kernel is misused.
> >>machine__process_kernel_mmap_event() may be one of them. If I understand
> >>correctly, 'dso->kernel && is_kernel_module(dso->long_name)' should always
> >>false theoretically. However, I don't have enough time to check whether that
> >>code really cause problem.
> >>
> >>---
> >>  tools/perf/util/probe-event.c | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >>diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
> >>index 2b78e8f..c7d6d3d 100644
> >>--- a/tools/perf/util/probe-event.c
> >>+++ b/tools/perf/util/probe-event.c
> >>@@ -270,7 +270,7 

Re: [PATCH] kexec: Add prefix "kexec" to output message

2015-09-22 Thread Baoquan He
On 09/23/15 at 09:37am, Dave Young wrote:
> > > Hi, Dave.
> > > 
> > > How about removing all of the prefix "crashkernel" in kexec_core. Thus
> > > we can be consistent with the output message prefix "kexec".
> > 
> > Ping, any comment is appreciate and helpful.
> 
> Remove "crashkernel" sounds not a proper way, it indicates crashkernel parsing
> messages. I have no idea what is the best way but below modification sounds 
> better to me:
> 
> kexec_core.c:
> 
> #define pr_fmt(fmt)"[kexec_core] " fmt
> Also remove below prefix "Kexec:"
> pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 
> 
> kexec.c:
> #define pr_fmt(fmt)"[kexec] " fmt
> 
> kexec_file.c:
> #define pr_fmt(fmt)"[kexec_file] " fmt

This is weird, user really don't need to know each file. I saw you added
a new file kexec_internal.h and all three files includes it. Why not doing
it there to make it the same as before?

> 
> Thanks
> Dave
> 
> ___
> kexec mailing list
> ke...@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/kexec
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: How to ensure that different peripherals getting different IOVA address in kernel?

2015-09-22 Thread Jiang Liu
On 2015/9/23 9:55, chenfeng wrote:
> Hi all,
> 
> In IOMMU architecture , how to make two different peripherals share the same 
> page table ?
> 
> In other words , is there a mechanism or structure to make two peripherals 
> get completely different address.
> 
> eg:
> 
> peri-A、peri-B and peri-C share the same iova address 0-1G for some 
> performance requests.
> 
> So the A,B,C need to use the same IOVA generator to ensure this,but I don't 
> find an architecture to make this.
> 
> Any help will be appreciated.
Hi Chenfeng,
Seems IOMMU group may help you, please take a look at
Documentation/vfio.txt.
Thanks!
Gerry
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] memcg: remove pcp_counter_lock

2015-09-22 Thread Greg Thelen
Commit 733a572e66d2 ("memcg: make mem_cgroup_read_{stat|event}() iterate
possible cpus instead of online") removed the last use of the per memcg
pcp_counter_lock but forgot to remove the variable.

Kill the vestigial variable.

Signed-off-by: Greg Thelen 
---
 include/linux/memcontrol.h | 1 -
 mm/memcontrol.c| 1 -
 2 files changed, 2 deletions(-)

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ad800e62cb7a..6452ff4c463f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -242,7 +242,6 @@ struct mem_cgroup {
 * percpu counter.
 */
struct mem_cgroup_stat_cpu __percpu *stat;
-   spinlock_t pcp_counter_lock;
 
 #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
struct cg_proto tcp_mem;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba34e09..da21143550c0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4179,7 +4179,6 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
if (memcg_wb_domain_init(memcg, GFP_KERNEL))
goto out_free_stat;
 
-   spin_lock_init(>pcp_counter_lock);
return memcg;
 
 out_free_stat:
-- 
2.6.0.rc0.131.gf624c3d

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RE: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of redundant mpic_irq_set_wake

2015-09-22 Thread Wang Dongsheng


> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, September 23, 2015 7:50 AM
> To: Sudeep Holla
> Cc: linux...@vger.kernel.org; linux-kernel@vger.kernel.org; Thomas Gleixner;
> Rafael J. Wysocki; Benjamin Herrenschmidt; Paul Mackerras; Michael Ellerman; 
> Jia
> Hongtao-B38951; Marc Zyngier; linuxppc-...@lists.ozlabs.org; Wang Dongsheng-
> B40534
> Subject: Re: [PATCH 04/17] powerpc: mpic: use IRQCHIP_SKIP_SET_WAKE instead of
> redundant mpic_irq_set_wake
> 
> On Mon, 2015-09-21 at 16:47 +0100, Sudeep Holla wrote:
> > mpic_irq_set_wake return -ENXIO for non FSL MPIC and sets IRQF_NO_SUSPEND
> > flag for FSL ones. enable_irq_wake already returns -ENXIO if irq_set_wak
> > is not implemented. Also there's no need to set the IRQF_NO_SUSPEND flag
> > as it doesn't guarantee wakeup for that interrupt.
> >

Non-freescale return -ENXIO, is there any issue? If non-freescale platform does
not support it, but IPs still use enable/disable_irq_wake, we should return a 
error number.

IRQCHIP_SKIP_SET_WAKE just skip this feature, this is not our expected.
If non-freescale platform need this flag to skip this feature, it should be add
in self platform.

@Scott:
If set this flag we cannot keep a irq as a wakeup source when system going to
SUSPEND or MEM.

irq_set_wake() means we can set this irq as a wake source.
IRQCHIP_SKIP_SET_WAKE is ignore irq_set_wake() feature.

Regards,
-Dongsheng



Re: [RFC v7 18/41] ext4: Add richacl support

2015-09-22 Thread Aneesh Kumar K.V
Andreas Gruenbacher  writes:

> From: "Aneesh Kumar K.V" 
>
> Support the richacl permission model in ext4.  The richacls are stored
> in "system.richacl" xattrs.  Richacls need to be enabled by tune2fs or
> at file system create time.
>

 Signed-off-by:  Aneesh Kumar K.V 

> Signed-off-by: Andreas Gruenbacher 
> ---
>  fs/ext4/Kconfig   |  15 
>  fs/ext4/Makefile  |   1 +
>  fs/ext4/acl.c |   6 +-
>  fs/ext4/acl.h |  12 +--
>  fs/ext4/file.c|   6 +-
>  fs/ext4/ialloc.c  |   7 +-
>  fs/ext4/inode.c   |  10 ++-
>  fs/ext4/namei.c   |  11 ++-
>  fs/ext4/richacl.c | 213 
> ++
>  fs/ext4/richacl.h |  47 
>  fs/ext4/xattr.c   |   6 ++
>  fs/ext4/xattr.h   |   1 +
>  12 files changed, 316 insertions(+), 19 deletions(-)
>  create mode 100644 fs/ext4/richacl.c
>  create mode 100644 fs/ext4/richacl.h
>
> diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
> index bf8bc8a..77a386d 100644
> --- a/fs/ext4/Kconfig
> +++ b/fs/ext4/Kconfig
> @@ -96,3 +96,18 @@ config EXT4_DEBUG
> If you select Y here, then you will be able to turn on debugging
> with a command such as:
>   echo 1 > /sys/module/ext4/parameters/mballoc_debug
> +
> +config EXT4_FS_RICHACL
> + bool "Ext4 Rich Access Control Lists (EXPERIMENTAL)"
> + depends on EXT4_FS
> + select FS_RICHACL
> + help
> +   Rich ACLs are an implementation of NFSv4 ACLs, extended by file masks
> +   to fit into the standard POSIX file permission model.  They are
> +   designed to work seamlessly locally as well as across the NFSv4 and
> +   CIFS/SMB2 network file system protocols.
> +
> +   To learn more about Rich ACL, visit
> +   http://acl.bestbits.at/richacl/
> +
> +   If you don't know what Rich ACLs are, say N
> diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
> index 75285ea..ea0d539 100644
> --- a/fs/ext4/Makefile
> +++ b/fs/ext4/Makefile
> @@ -14,3 +14,4 @@ ext4-$(CONFIG_EXT4_FS_POSIX_ACL)+= acl.o
>  ext4-$(CONFIG_EXT4_FS_SECURITY)  += xattr_security.o
>  ext4-$(CONFIG_EXT4_FS_ENCRYPTION)+= crypto_policy.o crypto.o \
>   crypto_key.o crypto_fname.o
> +ext4-$(CONFIG_EXT4_FS_RICHACL)   += richacl.o
> diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
> index 69b1e73..d965fa6 100644
> --- a/fs/ext4/acl.c
> +++ b/fs/ext4/acl.c
> @@ -140,7 +140,7 @@ fail:
>   * inode->i_mutex: don't care
>   */
>  struct posix_acl *
> -ext4_get_acl(struct inode *inode, int type)
> +ext4_get_posix_acl(struct inode *inode, int type)
>  {
>   int name_index;
>   char *value = NULL;
> @@ -234,7 +234,7 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int 
> type,
>  }
>
>  int
> -ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
> +ext4_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type)
>  {
>   handle_t *handle;
>   int error, retries = 0;
> @@ -259,7 +259,7 @@ retry:
>   * inode->i_mutex: up (access to inode is still exclusive)
>   */
>  int
> -ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
> +ext4_init_posix_acl(handle_t *handle, struct inode *inode, struct inode *dir)
>  {
>   struct posix_acl *default_acl, *acl;
>   int error;
> diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
> index da2c795..450b4d1 100644
> --- a/fs/ext4/acl.h
> +++ b/fs/ext4/acl.h
> @@ -54,17 +54,17 @@ static inline int ext4_acl_count(size_t size)
>  #ifdef CONFIG_EXT4_FS_POSIX_ACL
>
>  /* acl.c */
> -struct posix_acl *ext4_get_acl(struct inode *inode, int type);
> -int ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type);
> -extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
> +struct posix_acl *ext4_get_posix_acl(struct inode *inode, int type);
> +int ext4_set_posix_acl(struct inode *inode, struct posix_acl *acl, int type);
> +extern int ext4_init_posix_acl(handle_t *, struct inode *, struct inode *);
>
>  #else  /* CONFIG_EXT4_FS_POSIX_ACL */
>  #include 
> -#define ext4_get_acl NULL
> -#define ext4_set_acl NULL
> +#define ext4_get_posix_acl NULL
> +#define ext4_set_posix_acl NULL
>
>  static inline int
> -ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
> +ext4_init_posix_acl(handle_t *handle, struct inode *inode, struct inode *dir)
>  {
>   return 0;
>  }
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index bc313ac..3d3fcc8 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -29,6 +29,7 @@
>  #include "ext4_jbd2.h"
>  #include "xattr.h"
>  #include "acl.h"
> +#include "richacl.h"
>
>  /*
>   * Called when an inode is released. Note that this is different
> @@ -659,8 +660,9 @@ const struct inode_operations ext4_file_inode_operations 
> = {
>   .getxattr   = generic_getxattr,
>   .listxattr  = ext4_listxattr,
>   .removexattr= generic_removexattr,
> - .get_acl= ext4_get_acl,
> - .set_acl= ext4_set_acl,
> + .get_acl= 

Re: [RFC v7 19/41] ext4: Add richacl feature flag

2015-09-22 Thread Aneesh Kumar K.V
Andreas Gruenbacher  writes:

> From: "Aneesh Kumar K.V" 
>
> This feature flag selects richacl instead of posix acl support on the
> file system. In addition, the "acl" mount option is needed for enabling
> either of the two kinds of acls.
>
> Signed-off-by: Andreas Gruenbacher 

 Signed-off-by: Aneesh Kumar K.V 

> ---
>  fs/ext4/ext4.h  |  6 --
>  fs/ext4/super.c | 42 +-
>  2 files changed, 37 insertions(+), 11 deletions(-)
>
> diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
> index f5e9f04..e69c8ea 100644
> --- a/fs/ext4/ext4.h
> +++ b/fs/ext4/ext4.h
> @@ -991,7 +991,7 @@ struct ext4_inode_info {
>  #define EXT4_MOUNT_UPDATE_JOURNAL0x01000 /* Update the journal format */
>  #define EXT4_MOUNT_NO_UID32  0x02000  /* Disable 32-bit UIDs */
>  #define EXT4_MOUNT_XATTR_USER0x04000 /* Extended user 
> attributes */
> -#define EXT4_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */
> +#define EXT4_MOUNT_ACL   0x08000 /* Access Control Lists 
> */
>  #define EXT4_MOUNT_NO_AUTO_DA_ALLOC  0x1 /* No auto delalloc mapping */
>  #define EXT4_MOUNT_BARRIER   0x2 /* Use block barriers */
>  #define EXT4_MOUNT_QUOTA 0x8 /* Some quota option set */
> @@ -1582,6 +1582,7 @@ static inline int ext4_encrypted_inode(struct inode 
> *inode)
>  #define EXT4_FEATURE_INCOMPAT_LARGEDIR   0x4000 /* >2GB or 3-lvl 
> htree */
>  #define EXT4_FEATURE_INCOMPAT_INLINE_DATA0x8000 /* data in inode */
>  #define EXT4_FEATURE_INCOMPAT_ENCRYPT0x1
> +#define EXT4_FEATURE_INCOMPAT_RICHACL0x2
>
>  #define EXT2_FEATURE_COMPAT_SUPP EXT4_FEATURE_COMPAT_EXT_ATTR
>  #define EXT2_FEATURE_INCOMPAT_SUPP   (EXT4_FEATURE_INCOMPAT_FILETYPE| \
> @@ -1607,7 +1608,8 @@ static inline int ext4_encrypted_inode(struct inode 
> *inode)
>EXT4_FEATURE_INCOMPAT_FLEX_BG| \
>EXT4_FEATURE_INCOMPAT_MMP | \
>EXT4_FEATURE_INCOMPAT_INLINE_DATA | \
> -  EXT4_FEATURE_INCOMPAT_ENCRYPT)
> +  EXT4_FEATURE_INCOMPAT_ENCRYPT | \
> +  EXT4_FEATURE_INCOMPAT_RICHACL)
>  #define EXT4_FEATURE_RO_COMPAT_SUPP  (EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
>EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
>EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
> diff --git a/fs/ext4/super.c b/fs/ext4/super.c
> index 58987b5..05d6537 100644
> --- a/fs/ext4/super.c
> +++ b/fs/ext4/super.c
> @@ -1257,6 +1257,28 @@ static ext4_fsblk_t get_sb_block(void **data)
>   return sb_block;
>  }
>
> +static int enable_acl(struct super_block *sb)
> +{
> + sb->s_flags &= ~(MS_POSIXACL | MS_RICHACL);
> + if (test_opt(sb, ACL)) {
> + if (EXT4_HAS_INCOMPAT_FEATURE(sb,
> +   EXT4_FEATURE_INCOMPAT_RICHACL)) {
> +#ifdef CONFIG_EXT4_FS_RICHACL
> + sb->s_flags |= MS_RICHACL;
> +#else
> + return -EOPNOTSUPP;
> +#endif
> + } else {
> +#ifdef CONFIG_EXT4_FS_POSIX_ACL
> + sb->s_flags |= MS_POSIXACL;
> +#else
> + return -EOPNOTSUPP;
> +#endif
> + }
> + }
> + return 0;
> +}
> +
>  #define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
>  static char deprecated_msg[] = "Mount option \"%s\" will be removed by %s\n"
>   "Contact linux-e...@vger.kernel.org if you think we should keep it.\n";
> @@ -1403,9 +1425,9 @@ static const struct mount_opts {
>MOPT_NO_EXT2 | MOPT_DATAJ},
>   {Opt_user_xattr, EXT4_MOUNT_XATTR_USER, MOPT_SET},
>   {Opt_nouser_xattr, EXT4_MOUNT_XATTR_USER, MOPT_CLEAR},
> -#ifdef CONFIG_EXT4_FS_POSIX_ACL
> - {Opt_acl, EXT4_MOUNT_POSIX_ACL, MOPT_SET},
> - {Opt_noacl, EXT4_MOUNT_POSIX_ACL, MOPT_CLEAR},
> +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4_FS_RICHACL)
> + {Opt_acl, EXT4_MOUNT_ACL, MOPT_SET},
> + {Opt_noacl, EXT4_MOUNT_ACL, MOPT_CLEAR},
>  #else
>   {Opt_acl, 0, MOPT_NOSUPPORT},
>   {Opt_noacl, 0, MOPT_NOSUPPORT},
> @@ -3563,8 +3585,8 @@ static int ext4_fill_super(struct super_block *sb, void 
> *data, int silent)
>   set_opt(sb, NO_UID32);
>   /* xattr user namespace & acls are now defaulted on */
>   set_opt(sb, XATTR_USER);
> -#ifdef CONFIG_EXT4_FS_POSIX_ACL
> - set_opt(sb, POSIX_ACL);
> +#if defined(CONFIG_EXT4_FS_POSIX_ACL) || defined(CONFIG_EXT4_FS_RICHACL)
> + set_opt(sb, ACL);
>  #endif
>   /* don't forget to enable journal_csum when metadata_csum is enabled. */
>   if (ext4_has_metadata_csum(sb))
> @@ -3645,8 +3667,9 @@ static int ext4_fill_super(struct super_block *sb, void 
> *data, int silent)
>   clear_opt(sb, DELALLOC);
>   

RE: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE muram

2015-09-22 Thread Zhao Qiang
On Wen, Sep 23, 2015 at 8:19 AM +0800, Wood Scott-B07421 wrote:

> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, September 23, 2015 8:19 AM
> To: Zhao Qiang-B45475
> Cc: linux-kernel@vger.kernel.org; linuxppc-...@lists.ozlabs.org;
> lau...@codeaurora.org; Xie Xiaobo-R63061; b...@kernel.crashing.org; Li
> Yang-Leo-R58472; pau...@samba.org
> Subject: Re: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE muram
> 
> On Tue, 2015-09-22 at 03:10 -0500, Zhao Qiang-B45475 wrote:
> > On Tue, Sep 22, 2015 at 06:47 AM +0800, Wood Scott-B07421 wrote:
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, September 22, 2015 6:47 AM
> > > To: Zhao Qiang-B45475
> > > Cc: linux-kernel@vger.kernel.org; linuxppc-...@lists.ozlabs.org;
> > > lau...@codeaurora.org; Xie Xiaobo-R63061; b...@kernel.crashing.org;
> > > Li Yang-Leo-R58472; pau...@samba.org
> > > Subject: Re: [PATCH v10 3/5] CPM/QE: use genalloc to manage CPM/QE
> > > muram
> > >
> > > On Fri, Sep 18, 2015 at 03:15:19PM +0800, Zhao Qiang wrote:
> > > > Use genalloc to manage CPM/QE muram instead of rheap.
> > > >
> > > > Signed-off-by: Zhao Qiang 
> > > > ---
> > > > Changes for v9:
> > > >   - splitted from patch 3/5, modify cpm muram management functions.
> > > > Changes for v10:
> > > >   - modify cpm muram first, then move to qe_common
> > > >   - modify commit.
> > > >
> > > >  arch/powerpc/platforms/Kconfig   |   1 +
> > > >  arch/powerpc/sysdev/cpm_common.c | 150
> > > > +++
> > > >  2 files changed, 107 insertions(+), 44 deletions(-)
> > > >
> > > > diff --git a/arch/powerpc/platforms/Kconfig
> > > > b/arch/powerpc/platforms/Kconfig index b7f9c40..01f98a2 100644
> > > > --- a/arch/powerpc/platforms/Kconfig
> > > > +++ b/arch/powerpc/platforms/Kconfig
> > > > @@ -276,6 +276,7 @@ config QUICC_ENGINE
> > > >   bool "Freescale QUICC Engine (QE) Support"
> > > >   depends on FSL_SOC && PPC32
> > > >   select PPC_LIB_RHEAP
> > > > + select GENERIC_ALLOCATOR
> > > >   select CRC32
> > > >   help
> > > > The QUICC Engine (QE) is a new generation of communications
> > > > diff --git a/arch/powerpc/sysdev/cpm_common.c
> > > > b/arch/powerpc/sysdev/cpm_common.c
> > > > index 4f78695..453d18c 100644
> > > > --- a/arch/powerpc/sysdev/cpm_common.c
> > > > +++ b/arch/powerpc/sysdev/cpm_common.c
> > > > @@ -17,6 +17,7 @@
> > > >   * published by the Free Software Foundation.
> > > >   */
> > > >
> > > > +#include 
> > > >  #include 
> > > >  #include 
> > > >  #include 
> > > > @@ -27,7 +28,6 @@
> > > >
> > > >  #include 
> > > >  #include 
> > > > -#include 
> > > >  #include 
> > > >
> > > >  #include 
> > > > @@ -65,14 +65,24 @@ void __init udbg_init_cpm(void)  }  #endif
> > > >
> > > > +static struct gen_pool *muram_pool; static struct
> > > > +genpool_data_align muram_pool_data; static struct
> > > > +genpool_data_fixed muram_pool_data_fixed;
> > >
> > > Why are these global?  If you keep the data local to the caller (and
> > > use
> > > gen_pool_alloc_data()) then you probably don't need cpm_muram_lock.
> >
> > Ok
> >
> > >
> > > >  static spinlock_t cpm_muram_lock; -static rh_block_t
> > > > cpm_boot_muram_rh_block[16]; -static rh_info_t cpm_muram_info;
> > > > static u8 __iomem *muram_vbase;  static phys_addr_t muram_pbase;
> > > >
> > > > -/* Max address size we deal with */
> > > > +struct muram_block {
> > > > + struct list_head head;
> > > > + unsigned long start;
> > > > + int size;
> > > > +};
> > > > +
> > > > +static LIST_HEAD(muram_block_list);
> > > > +
> > > > +/* max address size we deal with */
> > > >  #define OF_MAX_ADDR_CELLS4
> > > > +#define GENPOOL_OFFSET   4096
> > >
> > > Is 4096 bytes the maximum alignment you'll ever need?  Wouldn't it
> > > be safer to use a much larger offset?
> >
> > Yes, 4096 is the maximum alignment I ever need.
> 
> Still, I'd be more comfortable with a larger offset.

Larger offset is good.

> 
> Better yet would be using gen_pool_add_virt() and using virtual addresses
> for the allocations, similar to http://patchwork.ozlabs.org/patch/504000/
> 
> > > > int cpm_muram_init(void)
> > > >  {
> > > > @@ -86,113 +96,165 @@ int cpm_muram_init(void)
> > > >   if (muram_pbase)
> > > >   return 0;
> > > >
> > > > - spin_lock_init(_muram_lock);
> > >
> > > Why are you eliminating the lock init, given that you're not getting
> > > rid of the lock?
> > >
> > > > - /* initialize the info header */
> > > > - rh_init(_muram_info, 1,
> > > > - sizeof(cpm_boot_muram_rh_block) /
> > > > - sizeof(cpm_boot_muram_rh_block[0]),
> > > > - cpm_boot_muram_rh_block);
> > > > -
> > > >   np = of_find_compatible_node(NULL, NULL, "fsl,cpm-muram-data");
> > > >   if (!np) {
> > > >   /* try legacy bindings */
> > > >   np = of_find_node_by_name(NULL, "data-only");
> > > >   if (!np) {
> > > > - printk(KERN_ERR "Cannot find CPM muram data
> node");
> > > > +

[PATCH 3/9] tools build: Fixup feature detection display function name

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

Cut'n'paste mistake, it should eval the name of the function
defined right next to it, in the next line, fix it.

Before:

  $ make -C tools/lib/bpf/
  make: Entering directory '/home/git/linux/tools/lib/bpf'

  Auto-detecting system features:
  ...libelf: [ on  ]
  ... libelf-getphdrnum: [ on  ]
  ...   libelf-mmap: [ on  ]
  ...   bpf: [ on  ]
  

After:

  $ make -C tools/lib/bpf/
  make: Entering directory '/home/git/linux/tools/lib/bpf'

  Auto-detecting system features:
  ...libelf: [ on  ]
  ... libelf-getphdrnum: [ OFF ]
  ...   libelf-mmap: [ OFF ]
  ...   bpf: [ on  ]
  

Acked-by: Jiri Olsa 
Cc: Adrian Hunter 
Cc: Alexei Starovoitov 
Cc: Borislav Petkov 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Cc: pi3or...@163.com
Fixes: 58d4f00ff13f ("perf build: Fix feature_check name clash")
Link: http://lkml.kernel.org/n/tip-dzu1c4sruukgfq5d5b1c4...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/build/Makefile.feature | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index c8fe6d177119..690d5614edd4 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -140,7 +140,7 @@ ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)")
   feature_display := 1
 endif
 
-feature_display_check = $(eval $(feature_check_code))
+feature_display_check = $(eval $(feature_check_display_code))
 define feature_display_check_code
   ifneq ($(feature-$(1)), 1)
 feature_display := 1
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[GIT PULL 0/9] perf/core improvements and fixes

2015-09-22 Thread Arnaldo Carvalho de Melo
Hi Ingo,

Please consider pulling,

- Arnaldo

The following changes since commit 96f3eda67fcf2598e9d2794398e0e7ab35138ea6:

  perf/x86/intel: Fix static checker warning in lbr enable (2015-09-18 09:24:57 
+0200)

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git 
tags/perf-core-for-mingo

for you to fetch changes up to e803cf97a4f90d31bcc2c9a1ea20fe9cdc12b2f9:

  perf record: Synthesize COMM event for a command line workload (2015-09-22 
22:43:12 -0300)


perf/core improvements and fixes:

User visible:

- Fix a segfault in 'perf probe' when removing uprobe events (Masami Hiramatsu)

- Synthesize COMM event for workloads started from the command line in 'perf
  record' so that we can have the pid->comm mapping before we get the real
  PERF_RECORD_COMM switching from perf to the workload (Namhyung Kim)

- Fix build tools/vm/ due to removal of tools/lib/api/fs/debugfs.h
  (Arnaldo Carvalho de Melo)

Developer stuff:

- Fix the make tarball targets by including the recently added err.h header in
  the perf MANIFEST file (Jiri Olsa)

- Don't assume that the event parser returns a non empty evlist (Wang Nan)

- Add way to disambiguate feature detection state files, needed to use
  tools/build feature detection for multiple components in a single O= output
  dir, which will be the case with tools/perf/ and tools/lib/bpf/
  (Arnaldo Carvalho de Melo)

- Fixup FEATURE_{TESTS,DISPLAY} inversion in tools/lib/bpf/ (Arnaldo Carvalho 
de Melo)

Signed-off-by: Arnaldo Carvalho de Melo 


Arnaldo Carvalho de Melo (5):
  tools build: Fixup feature detection display function name
  tools lib bpf: Fix up FEATURE_{TESTS,DISPLAY} usage
  tools build: Allow setting the feature detection user
  tools lib bpf: Use FEATURE_USER to allow building in the same dir as perf
  tools vm: Fix build due to removal of tools/lib/api/fs/debugfs.h

Jiri Olsa (1):
  perf tools: Add include/err.h into MANIFEST

Masami Hiramatsu (1):
  perf probe: Fix a segfault when removing uprobe events

Namhyung Kim (1):
  perf record: Synthesize COMM event for a command line workload

Wang Nan (1):
  perf tools: Don't assume that the parser returns non empty evsel list

 tools/build/Makefile.feature   |  9 +
 tools/lib/bpf/Makefile |  5 +++--
 tools/perf/MANIFEST|  1 +
 tools/perf/builtin-probe.c |  7 +--
 tools/perf/builtin-record.c| 15 ++-
 tools/perf/util/event.c|  2 +-
 tools/perf/util/event.h|  5 +
 tools/perf/util/parse-events.c | 16 
 tools/vm/page-types.c  |  6 +++---
 9 files changed, 53 insertions(+), 13 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 7/9] perf tools: Add include/err.h into MANIFEST

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Jiri Olsa 

Otherwise the tarpkg is incomplete (tarpkg tests fails).

Signed-off-by: Jiri Olsa 
Cc: David Ahern 
Cc: Namhyung Kim 
Cc: Peter Zijlstra 
Cc: Wang Nan 
Fixes: 01ca9fd41d6f ("tools: Add err.h with ERR_PTR PTR_ERR interface")
Link: http://lkml.kernel.org/r/1442846143-8556-1-git-send-email-jo...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/MANIFEST | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 2a958a80c763..9e6bdf5b2df6 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -50,6 +50,7 @@ tools/include/linux/poison.h
 tools/include/linux/rbtree.h
 tools/include/linux/rbtree_augmented.h
 tools/include/linux/types.h
+tools/include/linux/err.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
 include/asm-generic/bitops/fls64.h
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 9/9] perf record: Synthesize COMM event for a command line workload

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Namhyung Kim 

When perf creates a new child to profile, the events are enabled on
exec().  And in this case, it doesn't synthesize any event for the
child since they'll be generated during exec().  But there's an window
between the enabling and the event generation.

It used to be overcome since samples are only in kernel (so we always
have the map) and the comm is overridden by a later COMM event.
However it won't work if events are processed and displayed before the
COMM event overrides like in 'perf script'.  This leads to those early
samples (like native_write_msr_safe) not having a comm but pid (like
':15328').

So it needs to synthesize COMM event for the child explicitly before
enabling so that it can have a correct comm.  But at this time, the
comm will be "perf" since it's not exec-ed yet.

Committer note:

Before this patch:

  # perf record usleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
  # perf script --show-task-events
:4429  4429 27909.079372:  1 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
:4429  4429 27909.079375:  1 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
:4429  4429 27909.079376: 10 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
:4429  4429 27909.079377:223 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
:4429  4429 27909.079378:   6571 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
   usleep  4429 27909.079380: PERF_RECORD_COMM exec: usleep:4429/4429
   usleep  4429 27909.079381: 185403 cycles:  810a72d3 
flush_signal_handlers (/lib/modules/4.
   usleep  4429 27909.079444:2241110 cycles:  7fc575355be3 _dl_start 
(/usr/lib64/ld-2.20.so)
   usleep  4429 27909.079875: PERF_RECORD_EXIT(4429:4429):(4429:4429)

After:

  # perf record usleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.017 MB perf.data (7 samples) ]
  # perf script --show-task
 perf 0 0.00: PERF_RECORD_COMM: perf:8446/8446
 perf  8446 30154.038944:  1 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
 perf  8446 30154.038948:  1 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
 perf  8446 30154.038949:  9 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
 perf  8446 30154.038950:230 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
 perf  8446 30154.038951:   6772 cycles:  8105f45a 
native_write_msr_safe (/lib/modules/4.
   usleep  8446 30154.038952: PERF_RECORD_COMM exec: usleep:8446/8446
   usleep  8446 30154.038954: 196923 cycles:  81766440 
_raw_spin_lock (/lib/modules/4.3.0-rc1
   usleep  8446 30154.039021:2292130 cycles:  7f609a173dc4 memcpy 
(/usr/lib64/ld-2.20.so)
   usleep  8446 30154.039349: PERF_RECORD_EXIT(8446:8446):(8446:8446)
  #

Signed-off-by: Namhyung Kim 
Tested-by: Arnaldo Carvalho de Melo 
Cc: David Ahern 
Cc: Jiri Olsa 
Cc: Peter Zijlstra 
Link: 
http://lkml.kernel.org/r/1442881495-2928-1-git-send-email-namhy...@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/builtin-record.c | 15 ++-
 tools/perf/util/event.c |  2 +-
 tools/perf/util/event.h |  5 +
 3 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 142eeb341b29..a01c8ae1ee07 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -636,8 +636,21 @@ static int __cmd_record(struct record *rec, int argc, 
const char **argv)
/*
 * Let the child rip
 */
-   if (forks)
+   if (forks) {
+   union perf_event event;
+   /*
+* Some H/W events are generated before COMM event
+* which is emitted during exec(), so perf script
+* cannot see a correct process name for those events.
+* Synthesize COMM event to prevent it.
+*/
+   perf_event__synthesize_comm(tool, ,
+   rec->evlist->workload.pid,
+   process_synthesized_event,
+   machine);
+
perf_evlist__start_workload(rec->evlist);
+   }
 
if (opts->initial_delay) {
usleep(opts->initial_delay * 1000);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 497157affc9c..6214ad47d554 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -167,7 +167,7 @@ static int perf_event__prepare_comm(union perf_event 
*event, pid_t pid,
return 0;
 }
 
-static pid_t perf_event__synthesize_comm(struct perf_tool *tool,
+pid_t perf_event__synthesize_comm(struct perf_tool *tool,

[PATCH 4/9] tools lib bpf: Fix up FEATURE_{TESTS,DISPLAY} usage

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

When libbpf was introduced it wrongly asked for the "libelf" and "bpf"
feature tests to be performed (via FEATURE_TESTS), while asking that
"libbpf", "libelf-mmap", "libelf-getphdrnum" and "bpf" to have the
result of its respective tests to be displayed (via FEATURE_DISPLAY).

Due to another recently bug fixed in the tools/build/ infrastructure
("tools build: Fixup feature detection display function name") the
results for the entries in the FEATURE_DISPLAY, for this case, were
appearing as all succeeding, when two of them (the ones only on the
DISPLAY) were not even being performed.

Before:

  $ make -C tools/lib/bpf/
  make: Entering directory '/home/git/linux/tools/lib/bpf'
  Auto-detecting system features:
  ...libelf: [ on  ]
  ... libelf-getphdrnum: [ OFF ]
  ...   libelf-mmap: [ OFF ]
  ...   bpf: [ on  ]
  

After, with FEATURE_TESTS == FEATURE_DISPLAY:

  Auto-detecting system features:
  ...libelf: [ on  ]
  ... libelf-getphdrnum: [ on  ]
  ...   libelf-mmap: [ on  ]
  ...   bpf: [ on  ]
  

I just inverted, so that it tests the four features but displays just
the libelf and mmap ones, to make it more compact. So it becomes:

  $ make -C tools/lib/bpf/
  make: Entering directory '/home/git/linux/tools/lib/bpf'

  Auto-detecting system features:
  ...libelf: [ on  ]
  ...   bpf: [ on  ]

Acked-by: Jiri Olsa 
Cc: Adrian Hunter 
Cc: Alexei Starovoitov 
Cc: Borislav Petkov 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Cc: pi3or...@163.com
Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature 
check")
Link: http://lkml.kernel.org/n/tip-y4bd59e6j9rzzojiyeqrg...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index f68d23a0b487..604c12081b4b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -64,8 +64,8 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 #$(info Determined 'srctree' to be $(srctree))
 endif
 
-FEATURE_DISPLAY = libelf libelf-getphdrnum libelf-mmap bpf
-FEATURE_TESTS = libelf bpf
+FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
+FEATURE_DISPLAY = libelf bpf
 
 INCLUDES = -I. -I$(srctree)/tools/include 
-I$(srctree)/arch/$(ARCH)/include/uapi -I$(srctree)/include/uapi
 FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES)
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 8/9] tools vm: Fix build due to removal of tools/lib/api/fs/debugfs.h

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

There were some changes in how this debugfs mounting helper is
implemented/exported and we forgot to check if there were other users
besides perf, fix it.

Need to do a make -C tools/ everytime we do changes to
tools/{lib,include} and other places where we're moving things from
tools/perf/ to be used by other tools/ living code.

Fixed:

  $ make -C tools/vm
  make: Entering directory '/home/git/linux/tools/vm'
  make -C ../lib/api
  make[1]: Entering directory '/home/git/linux/tools/lib/api'
CC   fd/array.o
LD   fd/libapi-in.o
CC   fs/fs.o
CC   fs/tracing_path.o
LD   fs/libapi-in.o
CC   cpu.o
LD   libapi-in.o
AR   libapi.a
  make[1]: Leaving directory '/home/git/linux/tools/lib/api'
  gcc -Wall -Wextra -I../lib/ -o page-types page-types.c ../lib/api/libapi.a
  make: Leaving directory '/home/git/linux/tools/vm'
  $

Reported-by: Vinson Lee 
Tested-by: Vinson Lee 
Cc: Steven Rostedt 
Cc: Jiri Olsa 
Cc: Matt Fleming 
Cc: Raphael Beamonte 
Cc: H. Peter Anvin 
Cc: Peter Zijlstra 
Cc: David Ahern 
Cc: Thomas Gleixner 
Cc: Namhyung Kim 
Fixes: 60a1133a5b39 ("tools lib api fs: Remove debugfs, tracefs and findfs 
objects")
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/vm/page-types.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 7f73fa32a590..bcf5ec760eb9 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -42,7 +42,7 @@
 #include 
 #include "../../include/uapi/linux/magic.h"
 #include "../../include/uapi/linux/kernel-page-flags.h"
-#include 
+#include 
 
 #ifndef MAX_PATH
 # define MAX_PATH 256
@@ -188,7 +188,7 @@ static int  kpageflags_fd;
 static int opt_hwpoison;
 static int opt_unpoison;
 
-static char*hwpoison_debug_fs;
+static const char  *hwpoison_debug_fs;
 static int hwpoison_inject_fd;
 static int hwpoison_forget_fd;
 
@@ -487,7 +487,7 @@ static void prepare_hwpoison_fd(void)
 {
char buf[MAX_PATH + 1];
 
-   hwpoison_debug_fs = debugfs_mount(NULL);
+   hwpoison_debug_fs = debugfs__mount();
if (!hwpoison_debug_fs) {
perror("mount debugfs");
exit(EXIT_FAILURE);
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 2/9] perf tools: Don't assume that the parser returns non empty evsel list

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Wang Nan 

Don't blindly retrieve and use a last element in the lists returned by
parse_events__scanner(), as it may have collected no entries, i.e.
return an empty list.

Signed-off-by: Wang Nan 
Cc: Alexei Starovoitov 
Cc: Brendan Gregg 
Cc: Daniel Borkmann 
Cc: David Ahern 
Cc: He Kuang 
Cc: Jiri Olsa 
Cc: Kaixu Xia 
Cc: Masami Hiramatsu 
Cc: Namhyung Kim 
Cc: Paul Mackerras 
Cc: Peter Zijlstra 
Cc: Zefan Li 
Cc: pi3or...@163.com
Link: 
http://lkml.kernel.org/r/1441523623-152703-2-git-send-email-wangn...@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/util/parse-events.c | 16 
 1 file changed, 16 insertions(+)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 0fde5293a38e..61c2bc20926d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -827,6 +827,11 @@ void parse_events__set_leader(char *name, struct list_head 
*list)
 {
struct perf_evsel *leader;
 
+   if (list_empty(list)) {
+   WARN_ONCE(true, "WARNING: failed to set leader: empty list");
+   return;
+   }
+
__perf_evlist__set_leader(list);
leader = list_entry(list->next, struct perf_evsel, node);
leader->group_name = name ? strdup(name) : NULL;
@@ -1176,6 +1181,11 @@ int parse_events(struct perf_evlist *evlist, const char 
*str,
if (!ret) {
struct perf_evsel *last;
 
+   if (list_empty()) {
+   WARN_ONCE(true, "WARNING: event parser found nothing");
+   return -1;
+   }
+
perf_evlist__splice_list_tail(evlist, );
evlist->nr_groups += data.nr_groups;
last = perf_evlist__last(evlist);
@@ -1285,6 +1295,12 @@ foreach_evsel_in_last_glob(struct perf_evlist *evlist,
struct perf_evsel *last = NULL;
int err;
 
+   /*
+* Don't return when list_empty, give func a chance to report
+* error when it found last == NULL.
+*
+* So no need to WARN here, let *func do this.
+*/
if (evlist->nr_entries > 0)
last = perf_evlist__last(evlist);
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 5/9] tools build: Allow setting the feature detection user

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

We will use the tools/build/ autodetection in the eBPF patchkit
and it is currently sharing the output directory with perf, that
also uses the feature detection logic.

As we keep state in the output directory, so that we can avoid running
all the tests again, we need to have different filenames for the files
used in this state, allow doing that via the FEATURE_USER variable, to
be set alongside the existing FEATURE_{TEST,DISPLAY} variables.

v2: Fix comment describing the FEATURE_DUMP filename to make sure where
it is created, precisely at $(OUTPUT)FEATURE-DUMP$(FEATURE_USER).
Pointed out by Jiri.

Acked-by: Jiri Olsa 
Cc: Alexei Starovoitov 
Cc: Adrian Hunter 
Cc: Borislav Petkov 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Cc: pi3or...@163.com
Link: http://lkml.kernel.org/n/tip-fdbev0vrn3x6idqc3ajbn...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/build/Makefile.feature | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 690d5614edd4..72817e4d5e70 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -121,8 +121,9 @@ define feature_print_text_code
 MSG = $(shell printf '...%30s: %s' $(1) $(2))
 endef
 
+FEATURE_DUMP_FILENAME = $(OUTPUT)FEATURE-DUMP$(FEATURE_USER)
 FEATURE_DUMP := $(foreach 
feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat
-FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat 
$(OUTPUT)FEATURE-DUMP)
+FEATURE_DUMP_FILE := $(shell touch $(FEATURE_DUMP_FILENAME); cat 
$(FEATURE_DUMP_FILENAME))
 
 ifeq ($(dwarf-post-unwind),1)
   FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text))
@@ -131,12 +132,12 @@ endif
 # The $(feature_display) controls the default detection message
 # output. It's set if:
 # - detected features differes from stored features from
-#   last build (in FEATURE-DUMP file)
+#   last build (in $(FEATURE_DUMP_FILENAME) file)
 # - one of the $(FEATURE_DISPLAY) is not detected
 # - VF is enabled
 
 ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)")
-  $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP)
+  $(shell echo "$(FEATURE_DUMP)" > $(FEATURE_DUMP_FILENAME))
   feature_display := 1
 endif
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/9] tools lib bpf: Use FEATURE_USER to allow building in the same dir as perf

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Arnaldo Carvalho de Melo 

When building tools/lib/bpf as part of the tools/perf/ build process,
which will happend when we introduce a patch wiring that up, we end up
stomping on the feature detection caching mechanism, that uses a file in
the output directory (O=) that is shared by libbpf and perf to check if
something changed from one build to another that requires redoing the
feature detection process.

By using the recently introduced FEATURE_USER tools/build/ knob, we can
avoid that:

Before, every make invokation would run the feature detection:

  $ make O=/tmp/build/perf -C tools/perf
  make: Entering directory '/home/git/linux/tools/perf'
  Auto-detecting system features:
  ... dwarf: [ on  ]
  ... glibc: [ on  ]
  
  ... get_cpuid: [ on  ]
  ...   bpf: [ on  ]

GEN  perf-archive
GEN  perf-with-kcore

  Auto-detecting system features:
  ...libelf: [ on  ]
  ...   bpf: [ on  ]
  

After:

  $ make O=/tmp/build/perf -C tools/perf
  make: Entering directory '/home/git/linux/tools/perf'
BUILD:   Doing 'make -j4' parallel build
  make: Leaving directory '/home/git/linux/tools/perf'
  $

Because we now have two different feature detection state files:

  $ ls -la /tmp/build/perf/FEATURE-DUMP*
  -rw-rw-r--. 1 acme acme 338 Sep 21 17:25 /tmp/build/perf/FEATURE-DUMP
  -rw-rw-r--. 1 acme acme  33 Sep 21 17:25 /tmp/build/perf/FEATURE-DUMP.libbpf
  $

Acked-by: Jiri Olsa 
Cc: Adrian Hunter 
Cc: Alexei Starovoitov 
Cc: Borislav Petkov 
Cc: David Ahern 
Cc: Frederic Weisbecker 
Cc: Namhyung Kim 
Cc: Stephane Eranian 
Cc: Wang Nan 
Cc: pi3or...@163.com
Fixes: 1b76c13e4b36 ("bpf tools: Introduce 'bpf' library and add bpf feature 
check")
Link: http://lkml.kernel.org/n/tip-s6ev9wfqy7pvvs58emys2...@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/lib/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 604c12081b4b..e630f9fc4fb6 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -64,6 +64,7 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 #$(info Determined 'srctree' to be $(srctree))
 endif
 
+FEATURE_USER = .libbpf
 FEATURE_TESTS = libelf libelf-getphdrnum libelf-mmap bpf
 FEATURE_DISPLAY = libelf bpf
 
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 1/9] perf probe: Fix a segfault when removing uprobe events

2015-09-22 Thread Arnaldo Carvalho de Melo
From: Masami Hiramatsu 

Fix a segfault bug and a small mistake in perf probe -d.

Since the "ulist" in perf_del_probe_events is never initialized,
strlist__add(ulist, *) always causes a segfault when removing
uprobe events by perf probe -d.

Also, the "str" local variable is never released if fail to
allocate the "klist". This fixes it too.

This has been introduced by the commit e607f1426b58 ("perf probe:
Print deleted events in cmd_probe()").

Reported-by: Milian Wolff 
Signed-off-by: Masami Hiramatsu 
Cc: Namhyung Kim 
Link: 
http://lkml.kernel.org/r/20150916125241.4446.44805.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo 
---
 tools/perf/builtin-probe.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 94385ee89dc8..f7882ae9ebc6 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -380,8 +380,11 @@ static int perf_del_probe_events(struct strfilter *filter)
goto out;
 
klist = strlist__new(NULL, NULL);
-   if (!klist)
-   return -ENOMEM;
+   ulist = strlist__new(NULL, NULL);
+   if (!klist || !ulist) {
+   ret = -ENOMEM;
+   goto out;
+   }
 
ret = probe_file__get_events(kfd, filter, klist);
if (ret == 0) {
-- 
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [alsa-devel] [PATCH] mfd: arizona: Call the runtime PM function if the state is runtime resumed

2015-09-22 Thread Inha Song
 Hi, Charles,

I saw the log with LOG_DEVICE in regmap. But, I'm not sure the reason that 
suspend noirq failed is IRQ occuring.

Here is my log:
--
root@localhost:~# aplay test.wav 
[   40.831047] arizona spi1.0: Leaving AoD mode
[   40.834448] s3c64xx_spi_runtime_resume
[   40.834876] arizona spi1.0: d23 => 1
[   40.835029] s3c64xx_spi_runtime_suspend
[   40.846040] s3c64xx_spi_runtime_resume
[   40.846423] arizona spi1.0: d23 => 100
[   40.846557] s3c64xx_spi_runtime_suspend
[   40.846771] arizona spi1.0: d04 <= 100
[   40.846956] s3c64xx_spi_runtime_resume
[   40.852145] s3c64xx_spi_runtime_suspend
[   40.852386] s3c64xx_spi_runtime_resume
[   40.852894] s3c64xx_spi_runtime_suspend
[   40.854307] s3c64xx_spi_runtime_resume
[   40.858375] arizona spi1.0: d40 => 3
[   40.858410] s3c64xx_spi_runtime_suspend
[   40.865429] s3c64xx_spi_runtime_resume
[   40.869637] arizona spi1.0: d02 <= 2040
[   40.869671] s3c64xx_spi_runtime_suspend
[   40.876836] s3c64xx_spi_runtime_resume
[   40.880749] arizona spi1.0: Spurious HPDET IRQ
[   40.880773] arizona spi1.0: FLL1: Fref=2400 Fout=135475200
[   40.880787] arizona spi1.0: FLL1: Fvco=90316800Hz
[   40.880793] s3c64xx_spi_runtime_suspend
[   40.880803] arizona spi1.0: FLL1: GCD=19200
[   40.880817] arizona spi1.0: FLL1: N=7 THETA=149 LAMBDA=271
[   40.880831] arizona spi1.0: FLL1: FRATIO=0(0) OUTDIV=2 REFCLK_DIV=1
[   40.880842] arizona spi1.0: FLL1: GAIN=4
[   40.880886] arizona spi1.0: 171 <= 1
[   40.880944] s3c64xx_spi_runtime_resume
[   40.881218] s3c64xx_spi_runtime_suspend
[   40.930152] s3c64xx_spi_runtime_resume
[   40.934071] arizona spi1.0: d26 => 1
[   40.934140] s3c64xx_spi_runtime_suspend
[   40.941237] arizona spi1.0: Mixer dropped sample
[   40.945901] s3c64xx_spi_runtime_resume
[   40.950067] arizona spi1.0: d40 => 3
[   40.950091] s3c64xx_spi_runtime_suspend
[   40.957025] s3c64xx_spi_runtime_resume
[   40.960971] arizona spi1.0: d04 <= 1
[   40.961006] s3c64xx_spi_runtime_suspend
[   40.968106] s3c64xx_spi_runtime_resume
[   40.971956] arizona spi1.0: FLL1: clock OK
[   40.971987] s3c64xx_spi_runtime_suspend
[   40.979751] arizona spi1.0: SYSCLK set to 135475200Hz
[   40.984742] wm5110-codec wm5110-codec: AIF1: BCLK 1411200Hz LRCLK 44100Hz
[   40.992144] arizona spi1.0: SYSCLK set to 135475200Hz
[   40.996646] arizona spi1.0: 101 <= 8644
[   41.000360] s3c64xx_spi_runtime_resume
[   41.004300] arizona spi1.0: 51a <= 1
[   41.004357] s3c64xx_spi_runtime_suspend
[   41.011499] s3c64xx_spi_runtime_resume
[   41.015689] arizona spi1.0: 400 <= 8
[   41.015692] s3c64xx_spi_runtime_suspend
[   41.022581] s3c64xx_spi_runtime_resume
[   41.026735] arizona spi1.0: d40 => 3
[   41.026737] s3c64xx_spi_runtime_suspend
[   41.033687] s3c64xx_spi_runtime_resume
[   41.037765] arizona spi1.0: d02 <= 40
[   41.037800] s3c64xx_spi_runtime_suspend
[   41.044883] s3c64xx_spi_runtime_resume
[   41.049054] arizona spi1.0: d26 => 8
[   41.049072] s3c64xx_spi_runtime_suspend
[   41.056043] arizona spi1.0: ASRC underclocked

[   72.308238] arizona spi1.0: Suspend, disabling IRQ
[   72.320286] arizona spi1.0: 400 <= 0
[   72.320310] s3c64xx_spi_runtime_resume
[   72.336047] arizona spi1.0: 51a <= 0
[   72.336217] arizona spi1.0: 101 <= 8604
[   72.336401] arizona spi1.0: 171 <= 3
[   72.336425] arizona spi1.0: 171 <= 2
[   72.336731] arizona spi1.0: 171 <= 0
[   72.336751] arizona spi1.0: SYSCLK cleared
[   72.338584] arizona spi1.0: SYSCLK cleared
[   72.339123] s3c64xx_spi_suspend
[   72.355866] arizona spi1.0: Late suspend, reenabling IRQ
[   72.355893] >>> noirq failed because of spi1
[   72.355948] arizona spi1.0: Early resume, disabling IRQ
[   72.416798] PM: noirq suspend of devices failed
..

Best Regards,
Inha Song.

On Tue, 22 Sep 2015 08:46:18 +0100
Charles Keepax  wrote:

> On Mon, Sep 21, 2015 at 11:16:42AM +0900, Inha Song wrote:
> > Hi, Charles,
> > 
> > I've already tried to change.
> > If I change to that, we can't enter the suspen during the playback.
> > 
> > -
> > [   72.538263] arizona spi1.0: Suspend, disabling IRQ
> > ...
> > [   72.585823] arizona spi1.0: Late suspend, reengabling IRQ
> > [   72.585904] arizona spi1.0: Early resume, disabling IRQ
> > ...
> > [   72.646770] PM: noirq suspend of devices failed
> > <- because of "spi1.0" pm_wakeup_pending() in suspend_noirq()
> 
> Ok this looks more reasonable, suspend should fail because we do
> have an IRQ waiting to be handled, so we shouldn't suspend. But
> you are saying this happens every time the CODEC goes to suspend,
> thus blocking the suspend from suspending?
> 
> First step then would be to identify what IRQ is occurring, could
> you set LOG_DEVICE at the top of drivers/base/regmap/regmap.c to
> "spi1.0" and provide a log, so we can see what IRQ is being
> asserted from the CODEC.
> 
> Thanks,
> Charles
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

RE: [PATCH v10 5/5] QE: Move QE from arch/powerpc to drivers/soc

2015-09-22 Thread Zhao Qiang
On Wen, Sep 23, 2015 at 12:40 AM +0800, Wood Scott-B07421 wrote:

> -Original Message-
> From: Wood Scott-B07421
> Sent: Wednesday, September 23, 2015 12:40 AM
> To: Zhao Qiang-B45475
> Cc: linux-kernel@vger.kernel.org; linuxppc-...@lists.ozlabs.org;
> lau...@codeaurora.org; Xie Xiaobo-R63061; b...@kernel.crashing.org; Li
> Yang-Leo-R58472; pau...@samba.org
> Subject: Re: [PATCH v10 5/5] QE: Move QE from arch/powerpc to drivers/soc
> 
> On Tue, 2015-09-22 at 03:24 -0500, Zhao Qiang-B45475 wrote:
> > On Tue, Sep 22, 2015 at 06:56 AM +0800, Wood Scott-B07421 wrote:
> >
> > > -Original Message-
> > > From: Wood Scott-B07421
> > > Sent: Tuesday, September 22, 2015 6:56 AM
> > > To: Zhao Qiang-B45475
> > > Cc: linux-kernel@vger.kernel.org; linuxppc-...@lists.ozlabs.org;
> > > lau...@codeaurora.org; Xie Xiaobo-R63061; b...@kernel.crashing.org;
> > > Li Yang-Leo-R58472; pau...@samba.org
> > > Subject: Re: [PATCH v10 5/5] QE: Move QE from arch/powerpc to
> > > drivers/soc
> > >
> > > On Fri, Sep 18, 2015 at 03:15:21PM +0800, Zhao Qiang wrote:
> > > > diff --git a/drivers/soc/fsl/qe/Kconfig
> > > > b/drivers/soc/fsl/qe/Kconfig new file mode 100644 index
> > > > 000..3012571
> > > > --- /dev/null
> > > > +++ b/drivers/soc/fsl/qe/Kconfig
> > > > @@ -0,0 +1,33 @@
> > > > + help
> > > > +   The QUICC Engine (QE) is a new generation of communications
> > > > +   coprocessors on Freescale embedded CPUs (akin to CPM in older
> > > chips).
> > > > +   Selecting this option means that you wish to build a kernel
> > > > +   for a machine with a QE coprocessor.
> > > > +
> > > > +config UCC_SLOW
> > > > + bool
> > > > + default y if SERIAL_QE
> > > > + help
> > > > +   This option provides qe_lib support to UCC slow
> > > > +   protocols: UART, BISYNC, QMC
> > > > +
> > > > +config UCC_FAST
> > > > + bool
> > > > + default y if UCC_GETH
> > > > + help
> > > > +   This option provides qe_lib support to UCC fast
> > > > +   protocols: HDLC, Ethernet, ATM, transparent
> > >
> > > What does "qe_lib" mean to the end user, or to anyone after the code
> > > is moved to drivers/soc/fsl/qe?
> >
> > Qe_lib has functions configuring ucc, managing muram and so on.
> > It is the common functions to end user.
> 
> It's not going to be called qe_lib anymore.  s/provides qe_lib support to
> UCC /provides support for QE UCC/

Ok
-Zhao


Re: [PATCH 0/5] Add Marvell berlin4ct clk support

2015-09-22 Thread Jisheng Zhang
+ CLK maintainers

sorry, yesterday I pressed ENTER quickly before --to list is completed

On Tue, 22 Sep 2015 22:12:31 +0800
Jisheng Zhang  wrote:

> Add berlin4ct clk driver. The berlin4ct SoC contains:
> 
> two kinds of PLL: normal PLL and AVPLL. The normal PLL support is done.
> The AVPLL support is in TODO list.
> 
> two kinds of clk: normal clk and gate clk. The normal clk supports changing
> divider, selecting clock source, disabling/enabling etc. The gate clk only
> supports disabling/enabling. Both are supported in this series.
> 
> Jisheng Zhang (5):
>   clk: berlin: add common pll driver
>   clk: berlin: add common clk driver for newer SoCs
>   clk: berlin: add clk support for berlin4ct
>   dt-bindings: add binding for marvell berlin4ct SoC
>   arm64: dts: berlin4ct: add pll and clock nodes
> 
>  .../bindings/clock/marvell,berlin4ct.txt   |  38 +
>  arch/arm64/boot/dts/marvell/berlin4ct.dtsi |  38 +
>  drivers/clk/berlin/Makefile|   2 +-
>  drivers/clk/berlin/clk-berlin4ct.c | 164 
> +
>  drivers/clk/berlin/clk.c   | 147 ++
>  drivers/clk/berlin/clk.h   |  38 +
>  drivers/clk/berlin/pll.c   | 119 +++
>  include/dt-bindings/clock/berlin4ct.h  |  56 +++
>  8 files changed, 601 insertions(+), 1 deletion(-)
>  create mode 100644 
> Documentation/devicetree/bindings/clock/marvell,berlin4ct.txt
>  create mode 100644 drivers/clk/berlin/clk-berlin4ct.c
>  create mode 100644 drivers/clk/berlin/clk.c
>  create mode 100644 drivers/clk/berlin/clk.h
>  create mode 100644 drivers/clk/berlin/pll.c
>  create mode 100644 include/dt-bindings/clock/berlin4ct.h
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


How to ensure that different peripherals getting different IOVA address in kernel?

2015-09-22 Thread chenfeng
Hi all,

In IOMMU architecture , how to make two different peripherals share the same 
page table ?

In other words , is there a mechanism or structure to make two peripherals get 
completely different address.

eg:

peri-A、peri-B and peri-C share the same iova address 0-1G for some performance 
requests.

So the A,B,C need to use the same IOVA generator to ensure this,but I don't 
find an architecture to make this.

Any help will be appreciated.

Puck


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


RFC: 32-bit __data_len and REQ_DISCARD+REQ_SECURE

2015-09-22 Thread Grant Grundler
Jens, Ulf,

I've run into a basic issue: BLK_SECDISCARD takes 15-35 minutes
perform a secure erase of ~23GB (mostly empty) partition on a 32GB
eMMC part (happens with two vendors). One of the vendors says it
should take less than 60 seconds. I've confirmed erasing 2GB takes
only ~6 seconds - so the vendor estimate seems reasonable.

I'm looking for (a) advice on better v3.18 solutions and (b) start
discuss future solutions for upstream kernel.

Two problems:
1) 3.18 kernel block layer wants to split up the transaction into
"max_discard_sectors" chunks. This would be fine if
max_discard_sectors was more than one or two erase groups (EG). For
one of the eMMC parts, EG size is 512KB and thus the kernel sends
46,000 commands (DISCARD w/SECURE_ERASE arg) to the eMMC device. At
the bottom I discuss why this is such a bad idea for SECURE_ERASE.

My hack to fix (1): ignore max_discard_sectors for SECURE_ERASE
(BLK_SECDISCARD):
https://chromium-review.googlesource.com/#/c/301460/

I'm trying to recover the 9 bits that blk_rq_sectors() discards and
don't care sub-512byte block offsets.


2) Unfortunately, with the above hack, the block layer is truncating
the request to 2GB. :( Turns out several fields in the block layer are
32-bit. Notably:
struct request.__data_len
struct bvec_iter.bi_size
struct bio_vec.bv_len

This is despite ioctl(BLK_SECDISCARD) having a 64-bit API to pass in
"number of bytes to erase" and the emmc spec allowing 32-bits to
specify the LBA. I looked at changing the above data structures (which
normally only need  20-bits or so) to 64-bit and it's a systemic
change. "Non-trivial" doesn't even begin to describe the scope here.

So I have an even worse hack to use 9 "zero" bits here (and this works
for 3.18 - not sure it would for 4.x kernels):
https://chromium-review.googlesource.com/#/c/301349

9-bits gives me a bit of time until eMMC devices are > 1TB in
capacity. Maybe a few more months. :)


Last code change: I'm also not sure I need to set MMC_CAP2_HC_ERASE_SZ
but I do in this 3rd patch:
   https://chromium-review.googlesource.com/#/c/301461

sdhci-acpi and sdhci-pci drivers already do this. I didn't see an
obvious place to set this for the sdhci-tegra variant or what that
code is required to do/support when setting HC_ERASE_SZ capability.



--- Why is splitting up BLK_SECDISCARD such a bad idea? 
Command overhead in the degenerate case is bad and probably causing
the majority of the performance loss here. I'm assuming the device can
issue lots of erase transaction in parallel. Thus sending a secure
erase command for each erase group of LBAs seems like a recipe for
very long serial sequence (which is what we are seeing).

I believe there is another problem besides command overhead: migration
of "live" data from one erase group (that we just told the device it
has to erase) to another erase group (that we will tell the device to
erase in the future).

If I'm going to erase 16GB of 32GB device, we have NO clue what data
has to move and where it will move to.  However, If I tell the device
to evacuate and erase a particular physical block, the more chunks I
have to erase, the more opportunity for the evacuated data to land in
an erase group that will get erased in a future command.

Lastly, for Android, the system is in recovery mode and doing nothing
else. So there is no reason to split up the commands into "smaller
chunks" like a normal IO (where it clearly makes sense up to a point).
Secure Erase performance is critical here. The longer Secure Erase
takes, the less likely people will do it. And I don't think the world
needs more papers on linux products that don't properly erase data.

cheers,
grant
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [regression] [git pull] drm for 4.3

2015-09-22 Thread Dave Jones
On Tue, Sep 22, 2015 at 09:15:58AM -0700, Matt Roper wrote:
 > On Tue, Sep 22, 2015 at 05:13:55PM +0200, Daniel Vetter wrote:
 > > On Tue, Sep 22, 2015 at 08:00:17AM -0700, Jesse Barnes wrote:
 > > > Cc'ing Maarten and Matt; I'm guessing this may be related to one of
 > > > their recent patches.
 > 
 > Sounds like this showed up before my recent work, but I think I might
 > have seen similar problems while working on atomic watermarks; the
 > issues I was seeing were because the initial hardware readout could
 > leave primary->visible set to true even when the CRTC was off.  My
 > series (which is still under development) contains this patch to fix
 > that:
 > 
 > http://patchwork.freedesktop.org/patch/59564/
 > 
 > Does applying that help with the problems reported here?

No difference at all for me.

Dave

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v7 22/41] richacl: Propagate everyone@ permissions to other aces

2015-09-22 Thread J. Bruce Fields
On Wed, Sep 23, 2015 at 03:39:44AM +0200, Andreas Gruenbacher wrote:
> Here are my improvements; hope that helps ...

Yes, looks good, thanks!--b.

> 
> Thanks,
> Andreas
> 
> diff --git a/fs/richacl_compat.c b/fs/richacl_compat.c
> index 9b76fc0..21af9a0 100644
> --- a/fs/richacl_compat.c
> +++ b/fs/richacl_compat.c
> @@ -351,26 +351,26 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
>   struct richace *ace;
>   unsigned int owner_allow, group_allow;
>  
> - /*
> -  * If the owner mask contains permissions which are not in the group
> -  * mask, the group mask contains permissions which are not in the other
> -  * mask, or the owner class contains permissions which are not in the
> -  * other mask, we may need to propagate permissions up from the
> -  * everyone@ allow ace.  The third condition is implied by the first
> -  * two.
> -  */
> - if (!((acl->a_owner_mask & ~acl->a_group_mask) ||
> -   (acl->a_group_mask & ~acl->a_other_mask)))
> - return 0;
>   if (!acl->a_count)
>   return 0;
>   ace = acl->a_entries + acl->a_count - 1;
>   if (richace_is_inherit_only(ace) || !richace_is_everyone(ace))
>   return 0;
>  
> + /*
> +  * Permissions the owner and group class are granted through the
> +  * trailing everyone@ allow ace.
> +  */
>   owner_allow = ace->e_mask & acl->a_owner_mask;
>   group_allow = ace->e_mask & acl->a_group_mask;
>  
> + /*
> +  * If the group or other masks hide permissions which the owner should
> +  * be allowed, we need to propagate those permissions up.  Otherwise,
> +  * those permissions may be lost when applying the other mask to the
> +  * trailing everyone@ allow ace, or when isolating the group class from
> +  * the other class through additional deny aces.
> +  */
>   if (owner_allow & ~(acl->a_group_mask & acl->a_other_mask)) {
>   /* Propagate everyone@ permissions through to owner@. */
>   who.e_id.special = RICHACE_OWNER_SPECIAL_ID;
> @@ -379,6 +379,11 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
>   acl = alloc->acl;
>   }
>  
> + /*
> +  * If the other mask hides permissions which the group class should be
> +  * allowed, we need to propagate those permissions up to the owning
> +  * group and to all other members in the group class.
> +  */
>   if (group_allow & ~acl->a_other_mask) {
>   int n;
>  
> @@ -399,16 +404,15 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
>   richace_is_owner(ace) ||
>   richace_is_group(ace))
>   continue;
> - if (richace_is_allow(ace) || richace_is_deny(ace)) {
> - /*
> -  * Any inserted entry will end up below the
> -  * current entry
> -  */
> - if (__richacl_propagate_everyone(alloc, ace,
> -  group_allow))
> - return -1;
> - acl = alloc->acl;
> - }
> +
> + /*
> +  * Any inserted entry will end up below the current
> +  * entry.
> +  */
> + if (__richacl_propagate_everyone(alloc, ace,
> +  group_allow))
> + return -1;
> + acl = alloc->acl;
>   }
>   }
>   return 0;
> -- 
> 2.4.3
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: Multiple potential races on vma->vm_flags

2015-09-22 Thread Hugh Dickins
On Tue, 22 Sep 2015, Andrey Konovalov wrote:
> On Tue, Sep 22, 2015 at 8:54 PM, Hugh Dickins  wrote:
> > On Tue, 22 Sep 2015, Andrey Konovalov wrote:
> >> If anybody comes up with a patch to fix the original issue I easily
> >> can test it, since I'm hitting "BUG: Bad page state" in a second when
> >> fuzzing with KTSAN and Trinity.
> >
> > This "BUG: Bad page state" sounds more serious, but I cannot track down
> > your report of it: please repost - thanks - though on seeing it, I may
> > well end up with no ideas.
> 
> The report is below.

Thanks.

> 
> I get it after a few seconds of running Trinity on a kernel with KTSAN
> and targeting mlock, munlock and madvise syscalls.
> Sasha also observed a very similar crash a while ago
> (https://lkml.org/lkml/2014/11/6/1055).
> I didn't manage to reproduce this in a kernel build without KTSAN though.
> The idea was that data races KTSAN reports might be the explanation of
> these crashes.
> 
> BUG: Bad page state in process trinity-c15  pfn:281999
> page:ea000a066640 count:0 mapcount:0 mapping:  (null) index:0xd
> flags: 0x228000c(referenced|uptodate|swapbacked|mlocked)
> page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set
> bad because of flags:
> flags: 0x20(mlocked)
> Modules linked in:
> CPU: 3 PID: 11190 Comm: trinity-c15 Not tainted 4.2.0-tsan #1295
> Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011
>  821c3b70  00014741 8800b857f948
>  81e9926c 0003 ea000a066640 8800b857f978
>  811ce045 821c3b70 ea000a066640 0001
> Call Trace:
>  [< inline >] __dump_stack lib/dump_stack.c:15
>  [] dump_stack+0x63/0x81 lib/dump_stack.c:50
>  [] bad_page+0x115/0x1a0 mm/page_alloc.c:409
>  [< inline >] free_pages_check mm/page_alloc.c:731
>  [] free_pages_prepare+0x2f8/0x330 mm/page_alloc.c:922
>  [] free_hot_cold_page+0x51/0x2b0 mm/page_alloc.c:1908
>  [] free_hot_cold_page_list+0x5f/0x100
> mm/page_alloc.c:1956 (discriminator 3)
>  [] release_pages+0x151/0x300 mm/swap.c:967
>  [] __pagevec_release+0x43/0x60 mm/swap.c:984
>  [< inline >] pagevec_release include/linux/pagevec.h:69
>  [] shmem_undo_range+0x4fa/0x9d0 mm/shmem.c:446
>  [] shmem_truncate_range+0x2f/0x60 mm/shmem.c:540
>  [] shmem_fallocate+0x555/0x6e0 mm/shmem.c:2086
>  [] vfs_fallocate+0x1e0/0x310 fs/open.c:303
>  [< inline >] madvise_remove mm/madvise.c:326
>  [< inline >] madvise_vma mm/madvise.c:378
>  [< inline >] SYSC_madvise mm/madvise.c:528
>  [] SyS_madvise+0x378/0x760 mm/madvise.c:459
>  [] ? kt_atomic64_store+0x76/0x130 
> mm/ktsan/sync_atomic.c:161
>  [] entry_SYSCALL_64_fastpath+0x31/0x95
> arch/x86/entry/entry_64.S:188
> Disabling lock debugging due to kernel taint

This is totally untested, and one of you may quickly prove me wrong;
but I went in to fix your "Bad page state (mlocked)" by holding pte
lock across the down_read_trylock of mmap_sem in try_to_unmap_one(),
then couldn't see why it would need mmap_sem at all, given how mlock
and munlock first assert intention by setting or clearing VM_LOCKED
in vm_flags, then work their way up the vma, taking pte locks.

Calling mlock_vma_page() under pte lock may look suspicious
at first: but what it does is similar to clear_page_mlock(),
which we regularly call under pte lock from page_remove_rmap().

I'd rather wait to hear whether this appears to work in practice,
and whether you agree that it should work in theory, before writing
the proper description.  I'd love to lose that down_read_trylock.

You mention how Sasha hit the "Bad page state (mlocked)" back in
November: that was one of the reasons we reverted Davidlohr's
i_mmap_lock_read to i_mmap_lock_write in unmap_mapping_range(),
without understanding why it was needed.  Yes, it would lock out
a concurrent try_to_unmap(), whose setting of PageMlocked was not
sufficiently serialized by the down_read_trylock of mmap_sem.

But I don't remember the other reasons for that revert (and
haven't looked very hard as yet): anyone else remember?

Not-yet-Signed-off-by: Hugh Dickins 
---

 mm/rmap.c |   32 +++-
 1 file changed, 7 insertions(+), 25 deletions(-)

--- 4.3-rc2/mm/rmap.c   2015-09-12 18:30:20.857039763 -0700
+++ linux/mm/rmap.c 2015-09-22 17:47:43.489096676 -0700
@@ -1314,9 +1314,12 @@ static int try_to_unmap_one(struct page
 * skipped over this mm) then we should reactivate it.
 */
if (!(flags & TTU_IGNORE_MLOCK)) {
-   if (vma->vm_flags & VM_LOCKED)
-   goto out_mlock;
-
+   if (vma->vm_flags & VM_LOCKED) {
+   /* Holding pte lock, we do *not* need mmap_sem here */
+   mlock_vma_page(page);
+   ret = SWAP_MLOCK;
+   goto out_unmap;
+   }
if (flags & TTU_MUNLOCK)
goto 

Re: [RFC v7 22/41] richacl: Propagate everyone@ permissions to other aces

2015-09-22 Thread Andreas Gruenbacher
Here are my improvements; hope that helps ...

Thanks,
Andreas

diff --git a/fs/richacl_compat.c b/fs/richacl_compat.c
index 9b76fc0..21af9a0 100644
--- a/fs/richacl_compat.c
+++ b/fs/richacl_compat.c
@@ -351,26 +351,26 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
struct richace *ace;
unsigned int owner_allow, group_allow;
 
-   /*
-* If the owner mask contains permissions which are not in the group
-* mask, the group mask contains permissions which are not in the other
-* mask, or the owner class contains permissions which are not in the
-* other mask, we may need to propagate permissions up from the
-* everyone@ allow ace.  The third condition is implied by the first
-* two.
-*/
-   if (!((acl->a_owner_mask & ~acl->a_group_mask) ||
- (acl->a_group_mask & ~acl->a_other_mask)))
-   return 0;
if (!acl->a_count)
return 0;
ace = acl->a_entries + acl->a_count - 1;
if (richace_is_inherit_only(ace) || !richace_is_everyone(ace))
return 0;
 
+   /*
+* Permissions the owner and group class are granted through the
+* trailing everyone@ allow ace.
+*/
owner_allow = ace->e_mask & acl->a_owner_mask;
group_allow = ace->e_mask & acl->a_group_mask;
 
+   /*
+* If the group or other masks hide permissions which the owner should
+* be allowed, we need to propagate those permissions up.  Otherwise,
+* those permissions may be lost when applying the other mask to the
+* trailing everyone@ allow ace, or when isolating the group class from
+* the other class through additional deny aces.
+*/
if (owner_allow & ~(acl->a_group_mask & acl->a_other_mask)) {
/* Propagate everyone@ permissions through to owner@. */
who.e_id.special = RICHACE_OWNER_SPECIAL_ID;
@@ -379,6 +379,11 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
acl = alloc->acl;
}
 
+   /*
+* If the other mask hides permissions which the group class should be
+* allowed, we need to propagate those permissions up to the owning
+* group and to all other members in the group class.
+*/
if (group_allow & ~acl->a_other_mask) {
int n;
 
@@ -399,16 +404,15 @@ richacl_propagate_everyone(struct richacl_alloc *alloc)
richace_is_owner(ace) ||
richace_is_group(ace))
continue;
-   if (richace_is_allow(ace) || richace_is_deny(ace)) {
-   /*
-* Any inserted entry will end up below the
-* current entry
-*/
-   if (__richacl_propagate_everyone(alloc, ace,
-group_allow))
-   return -1;
-   acl = alloc->acl;
-   }
+
+   /*
+* Any inserted entry will end up below the current
+* entry.
+*/
+   if (__richacl_propagate_everyone(alloc, ace,
+group_allow))
+   return -1;
+   acl = alloc->acl;
}
}
return 0;
-- 
2.4.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] kexec: Add prefix "kexec" to output message

2015-09-22 Thread Dave Young
On 09/23/15 at 12:07am, Minfei Huang wrote:
> On 09/15/15 at 11:08am, Minfei Huang wrote:
> > On 09/14/15 at 04:44pm, Dave Young wrote:
> > > On 09/14/15 at 03:50pm, Minfei Huang wrote:
> > > > On 09/13/15 at 11:52am, Eric W. Biederman wrote:
> > > > > Minfei Huang  writes:
> > > > > 
> > > > > > kexec output message misses the prefix "kexec", when Dave Young 
> > > > > > split
> > > > > > the kexec code. To keep the same format, add the prefix "kexec" to
> > > > > > output message.
> > > > > 
> > > > > What of kexec_core? What of the messages that already have a prefix?
> > > > > 
> > > > 
> > > > Hi, Eric.
> > > > 
> > > > Last commit(2965fa), Dave Young (dyoung@redhatcom) split the previous
> > > > kernel/kexec.c into three pieces(kexec_core.c, kexec_file.c, kexec.c).
> > > > The common functions used by both kexec and kexec_file are placed in
> > > > file kernel/kexec_core.c.
> > > > 
> > > > The format of the output message likes "kexec: SYSC_kexec_load: hello,
> > > > world" previously. Due to the missing prefix "kexec", now it like
> > > > "SYSC_kexec_load: hello, world".
> > > > 
> > > > Dave Young misses the Macro pr_fmt to define the prefix output message
> > > > in file kexec.c and kexec_file.c. I think the previous Macro was moved
> > > > into the file kexec_core.c when Dave did the splitting.
> > > 
> > > I'm not sure it is proper to add prefix "kexec: " in all kexec*.c, so
> > > only keep it in kexec_core.c.
> > > 
> > > There's already printks with prefix like "Kexec:", "crashkernel:" and
> > > other strings. Adding another prefix before them looks odd. So either 
> > > remove 
> > > the prefix in kexec_core, or remove other prefixes already exists in 
> > > kexec*.c
> > > I would prefix to remove the "kexec:" prefix in kexec_core.c
> > 
> > Hi, Dave.
> > 
> > How about removing all of the prefix "crashkernel" in kexec_core. Thus
> > we can be consistent with the output message prefix "kexec".
> 
> Ping, any comment is appreciate and helpful.

Remove "crashkernel" sounds not a proper way, it indicates crashkernel parsing
messages. I have no idea what is the best way but below modification sounds 
better to me:

kexec_core.c:

#define pr_fmt(fmt)"[kexec_core] " fmt
Also remove below prefix "Kexec:"
pr_warn("Kexec: Memory allocation for saving cpu register states failed\n"); 

kexec.c:
#define pr_fmt(fmt)"[kexec] " fmt

kexec_file.c:
#define pr_fmt(fmt)"[kexec_file] " fmt

Thanks
Dave
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer

2015-09-22 Thread Eric Auger
Hi Feng,
On 09/22/2015 11:01 AM, Wu, Feng wrote:
> 
> 
>> -Original Message-
>> From: linux-kernel-ow...@vger.kernel.org
>> [mailto:linux-kernel-ow...@vger.kernel.org] On Behalf Of Eric Auger
>> Sent: Tuesday, September 22, 2015 3:46 AM
>> To: Paolo Bonzini; Wu, Feng; alex.william...@redhat.com; j...@8bytes.org;
>> mtosa...@redhat.com
>> Cc: k...@vger.kernel.org; io...@lists.linux-foundation.org;
>> linux-kernel@vger.kernel.org
>> Subject: Re: [PATCH v9 12/18] vfio: Register/unregister irq_bypass_producer
>>
>> Hi,
>> On 09/21/2015 03:02 PM, Paolo Bonzini wrote:
>>>
>>>
>>> On 21/09/2015 14:53, Wu, Feng wrote:
 I think the point is that we cannot trigger the build of irqbypass
 manager inside KVM or VFIO, we need trigger the build at a high
 level and it should be built before VFIO and KVM. Any ideas?
>>
>> We can add virt/Makefile and build virt/lib/ directly, not through
>> arch/x86/kvm.
 Thinking about this more, does that mean we need to add the virt directory
 in the top Makefile in Linux tree?
>>>
>>> Yes, it does.
>> So I understand this will replace patches 2 & 3 then and will fix the
>> arm64 issue then.
> 
> I just sent a patch to fix this build error. BTW, from the reply of Paolo, 
> seems
> he dropped patch 3 in this series, maybe he think it doesn't have much
> relationship with other patches, so maybe you could include it in your series
> when forwarded irq work gets ready.
OK no problem. I moved that patch (modified according to the new
compilation scheme) in the irq forwarding series.

Thanks

Eric
> 
> Thanks,
> Feng
> 
>>
>> Thanks
>>
>> Eric
>>
>>>
>>> Paolo
>>>
>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
>> the body of a message to majord...@vger.kernel.org
>> More majordomo info at  http://vger.kernel.org/majordomo-info.html
>> Please read the FAQ at  http://www.tux.org/lkml/

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] f2fs: fix incorrect bimodal calculation

2015-09-22 Thread Chao Yu
In update_sit_info, we use div_u64 to handle 'u64 divide u64' case, but
div_u64 can only handle 32-bits divisor, so our divisor with u64 type
passed to div_u64 will overflow, result in the wrong calculation when
show debug info of f2fs as below:

BDF: 464, avg. vblocks: 23509
(BDF should never exceed 100)

So change to use div64_u64 to handle this case correctly.

Signed-off-by: Chao Yu 
---
 fs/f2fs/debug.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index ebfcc40..615a307 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -118,7 +118,7 @@ static void update_sit_info(struct f2fs_sb_info *sbi)
}
}
dist = div_u64(MAIN_SECS(sbi) * hblks_per_sec * hblks_per_sec, 100);
-   si->bimodal = div_u64(bimodal, dist);
+   si->bimodal = div64_u64(bimodal, dist);
if (si->dirty_count)
si->avg_vblocks = div_u64(total_vblocks, ndirty);
else
-- 
2.5.2


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] Input: elan_i2c - Add all valid ic type for i2c/smbus

2015-09-22 Thread duson
Signed-of-by: Duson Lin 
---
 drivers/input/mouse/elan_i2c_core.c |   11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/input/mouse/elan_i2c_core.c 
b/drivers/input/mouse/elan_i2c_core.c
index 16ac595..357670e 100644
--- a/drivers/input/mouse/elan_i2c_core.c
+++ b/drivers/input/mouse/elan_i2c_core.c
@@ -98,16 +98,25 @@ static int elan_get_fwinfo(u8 iap_version, u16 
*validpage_count,
   u16 *signature_address)
 {
switch (iap_version) {
+   case 0x00:
+   case 0x06:
case 0x08:
*validpage_count = 512;
break;
-   case 0x09:
case 0x03:
+   case 0x07:
+   case 0x09:
+   case 0x0A:
+   case 0x0B:
+   case 0x0C:
*validpage_count = 768;
break;
case 0x0D:
*validpage_count = 896;
break;
+   case 0x0E:
+   *validpage_count = 640;
+   break;
default:
/* unknown ic type clear value */
*validpage_count = 0;
-- 
1.7.9.5


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [RFC v7 22/41] richacl: Propagate everyone@ permissions to other aces

2015-09-22 Thread Andreas Gruenbacher
2015-09-21 21:24 GMT+02:00 J. Bruce Fields :
> On Fri, Sep 18, 2015 at 05:56:11PM -0400, bfields wrote:
>> On Sat, Sep 05, 2015 at 12:27:17PM +0200, Andreas Gruenbacher wrote:
>> > +   /*
>> > +* If the owner mask contains permissions which are not in the group
>> > +* mask, the group mask contains permissions which are not in the other
>> > +* mask, or the owner class contains permissions which are not in the
>>
>> s/owner class/owner mask?
>>
>> > +* other mask, we may need to propagate permissions up from the
>> > +* everyone@ allow ace.  The third condition is implied by the first
>> > +* two.
>> > +*/
>> > +   if (!((acl->a_owner_mask & ~acl->a_group_mask) ||
>> > + (acl->a_group_mask & ~acl->a_other_mask)))
>> > +   return 0;
>>
>> The code looks right, but I don't understand the preceding comment.
>>
>> For example,
>>
>>   owner mask: rw
>>   group mask:  wx
>>   other mask: rw
>>
>> satisfies the first two conditions, but not the third.
>>
>> Also, I don't understand why the first condition would imply that we
>> might need to propagate permissions.
>
> OK, maybe I get the part about the owner mask containing permissions
> not in the group mask: we'll need to insert a deny ace for the bits in
> the other mask but not in the group mask, and then we'll need an allow
> ace for the owner to get those bits back.  I think?

That is indeed the reason, and it also seems clear that this wasn't
documented well enough. Let me remove the offending comment and tiny
optimization, and add better comments instead.

>> > +   if (richace_is_allow(ace) || richace_is_deny(ace)) {
>
> The v4 spec allows aces other than allow and deny aces (audit and
> alarm), but I didn't think you were implementing those.

Right, I don't see that happening. I'll remove that as well.

Thanks,
Andreas
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] perf probe: Fix module probing with shortname

2015-09-22 Thread Wangnan (F)



On 2015/9/22 21:35, Arnaldo Carvalho de Melo wrote:

Em Tue, Sep 22, 2015 at 03:34:32AM +, Wang Nan escreveu:

After commit 3d39ac538629e4f00a6e1c38d46346f1b8e69505 ("perf machine:
No need to have two DSOs lists"), perf probe with module short name doesn't
work again. For example:

  # lsmod | grep e1000e
  e1000e233472  0

  # cat /proc/modules | grep e1000e
  e1000e 233472 0 - Live 0xa0073000

  # cat /proc/kallsyms | grep '\'
  a0093860 t e1000e_up[e1000e]

  # perf probe -v -m e1000e --add e1000e_up
  probe-definition(0): e1000e_up
  symbol:e1000e_up file:(null) line:0 offset:0 return:0 lazy:(null)
  0 arguments
  Failed to find module e1000e.
  Could not open debuginfo. Try to use symbols.
  Looking at the vmlinux_path (7 entries long)
  Using /lib/modules/4.2.0-rc7+/build/vmlinux for symbols
  e1000e_up is out of .text, skip it.
Error: Failed to add events. Reason: No such file or directory (Code: -2)

This is caused by a misunderstood of dso->kernel in kernel_get_module_dso()
that, for kernel module, dso->kernel is DSO_TYPE_USER. dso->kernel is 
DSO_TYPE_KERNEL
iff dso is vmlinux.

Kernel modules having DSO_TYPE_USER seems to be the bug, no? I'll try to
check that...


I also noticed this problem when I working on commit
1f121b03d058dd07199d8924373d3c52a207f63b ("perf tools: Deal with kernel 
module names in '[]' correctly") ;)


It should be bug, but I think fixing it is costy. Here's an assumption 
that, if dso->kernel

is not zero, the dso should be vmlinux (not kernel module):

$ grep 'dso.>kernel)' ./tools/perf/ -r
./tools/perf/builtin-inject.c:if (dso->kernel)
./tools/perf/util/symbol.c:if (dso->kernel) {
./tools/perf/util/symbol-elf.c:if (dso->kernel)
./tools/perf/util/symbol-elf.c:if (remap_kernel && 
dso->kernel) {

./tools/perf/util/event.c:if (pos->dso->kernel)
./tools/perf/util/probe-event.c:if (dso->kernel)
./tools/perf/util/map.c: * map->dso->kernel) before calling 
__map__is_{kernel,kmodule}())

./tools/perf/util/map.c:if (!map->dso || !map->dso->kernel) {
./tools/perf/builtin-top.c:if (!map->dso->kernel)

So care must be taken.

Another solution seems simpler: we can redefine the meaning of enum 
dso_kernel_type like this:


# find  ./tools/perf/ -type f | xargs -n1 sed -i 
's/DSO_TYPE_USER/DSO_TYPE_NOT_VMLINUX/g'
# find  ./tools/perf/ -type f | xargs -n1 sed -i 
's/DSO_TYPE_KERNEL/DSO_TYPE_VMLINUX/g'
# find  ./tools/perf/ -type f | xargs -n1 sed -i 
's/DSO_TYPE_GUEST_KERNEL/DSO_TYPE_GUEST_VMLINUX/g'


By fixing the name of DSO_TYPE_USER, kernel module with 
DSO_TYPE_NOT_VMLINUX seems

not so buggy. (Please choose a better name...)

What's your opinion?

Thank you.


- Arnaldo


This patch fix 'perf probe -m' with an ad-hoc way.

After this patch:

  # perf probe -v -m e1000e --add e1000e_up
  probe-definition(0): e1000e_up
  symbol:e1000e_up file:(null) line:0 offset:0 return:0 lazy:(null)
  0 arguments
  Open Debuginfo file: 
/lib/modules/4.2.0-rc7+/kernel/drivers/net/ethernet/intel/e1000e/e1000e.ko
  Try to find probe point from debuginfo.
  Matched function: e1000e_up
  Probe point found: e1000e_up+0
  Found 1 probe_trace_events.
  Opening /sys/kernel/debug/tracing//kprobe_events write=1
  Writing event: p:probe/e1000e_up e1000e:e1000e_up+0
  Added new event:
probe:e1000e_up  (on e1000e_up in e1000e)

  You can now use it in all perf tools, such as:

perf record -e probe:e1000e_up -aR sleep 1

  # perf probe -l
  Failed to find debug information for address a0093860
probe:e1000e_up  (on e1000e_up in e1000e)

Signed-off-by: Wang Nan 
Cc: Arnaldo Carvalho de Melo 
Cc: Namhyung Kim 
Cc: Jiri Olsa 
Cc: Masami Hiramatsu 
---

I think there may be other places where dso->kernel is misused.
machine__process_kernel_mmap_event() may be one of them. If I understand
correctly, 'dso->kernel && is_kernel_module(dso->long_name)' should always
false theoretically. However, I don't have enough time to check whether that
code really cause problem.

---
  tools/perf/util/probe-event.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2b78e8f..c7d6d3d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -270,7 +270,7 @@ static int kernel_get_module_dso(const char *module, struct 
dso **pdso)
  
  	if (module) {

list_for_each_entry(dso, _machine->dsos.head, node) {
-   if (!dso->kernel)
+   if (dso->kernel)
continue;
if (strncmp(dso->short_name + 1, module,
dso->short_name_len - 2) == 0)
--
1.8.3.4



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read 

Re: [PATCH] net: davinci_emac: Add support for fixed-link PHY

2015-09-22 Thread Tony Lindgren
* Neil Armstrong  [150922 02:01]:
> In case the DaVinci Emac is directly connected to a
> non-mdio PHY/device, it should be possible to provide
> a fixed link configuration in the DT.
> 
> Signed-off-by: Neil Armstrong 

Ethernet works for me with this patch:

Tested-by: Tony Lindgren 

> ---
>  drivers/net/ethernet/ti/davinci_emac.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ethernet/ti/davinci_emac.c 
> b/drivers/net/ethernet/ti/davinci_emac.c
> index aeebc0a..6521dfb 100644
> --- a/drivers/net/ethernet/ti/davinci_emac.c
> +++ b/drivers/net/ethernet/ti/davinci_emac.c
> @@ -1861,8 +1861,12 @@ davinci_emac_of_get_pdata(struct platform_device 
> *pdev, struct emac_priv *priv)
>   pdata->no_bd_ram = of_property_read_bool(np, "ti,davinci-no-bd-ram");
> 
>   priv->phy_node = of_parse_phandle(np, "phy-handle", 0);
> - if (!priv->phy_node)
> - pdata->phy_id = NULL;
> + if (!priv->phy_node) {
> + if (!of_phy_is_fixed_link(np))
> + pdata->phy_id = NULL;
> + else if (of_phy_register_fixed_link(np) >= 0)
> + priv->phy_node = of_node_get(np);
> + }
> 
>   auxdata = pdev->dev.platform_data;
>   if (auxdata) {
> -- 
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


I am inspired to send you this email

2015-09-22 Thread Steve
Hello

I am inspired to send you this email by the huge opportunity that it will be of 
mutual benefit to us.My late client a national of Israel a Man who used to work 
with Shell before his untimely death few years ago and inquiries to several 
embassies to locate any of my clients extended relatives proved abortive.I 
decide to trace his relatives over the Internet . Still no success.

Hence,this contact to you.I contacted you only to assist in repatriating the 
sum involved otherwise the estate he left behind will be declared unserviceable 
by the bank which just issued me a notice to provide the next of kin or have 
the account frozen.I now use this medium to seek your consent so that the 
proceeds can be paid to you since I have all the necessary legal documents to 
back up our claim.

I guarantee you that this will be executed under a legitimate arrangement that 
will protect you and I from any breach of the law.All I require is your honest 
cooperation to enable us redeem the funds.

Kindly reply back if this proposal satisfies your interest.

Steven Mark.
Email:stevemark...@yahoo.co.uk
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v5 1/6] power: bq27x00_battery: Renaming for consistency

2015-09-22 Thread Tony Lindgren
* Sebastian Reichel  [150922 14:04]:
> Hi Tony and Guan,
> 
> I plan to merge the following patch, which changes
> CONFIG_BATTERY_BQ27x00 to CONFIG_BATTERY_BQ27xxx.
> This includes changes to omap2plus_defconfig and
> unicore32. Can you Ack this patch?

For omap2plus_defconfig:

Acked-by: Tony Lindgren 
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] sched: fix task and run queue run_delay inconsistencies

2015-09-22 Thread Meyer, Mike
During evaluation of some performance data, it was discovered thread
and run queue run_delay accounting data was inconsistent with the other
accounting data that was collected.  Further investigation found under
certain circumstances execution time was leaking into the task and
run queue accounting of run_delay.

Consider the following sequence:

a. thread is running.
b. thread moves beween cgroups, changes scheduling class or priority.
c. thread sleeps OR
d. thread involuntarily gives up cpu.

a. implies:

thread->sched_info.last_queued = 0

a. and b. results in the following:

1. dequeue_task(rq, thread)

   sched_info_dequeued(rq, thread)
   delta = 0

   sched_info_reset_dequeued(thread)
   thread->sched_info.last_queued = 0

   thread->sched_info.run_delay += delta

2. enqueue_task(rq, thread)

   sched_info_queued(rq, thread)

   /* thread is still on cpu at this point. */
   thread->sched_info.last_queued = task_rq(thread)->clock;

c. results in:

dequeue_task(rq, thread)

sched_info_dequeued(rq, thread)

/* delta is execution time not run_delay. */
delta = task_rq(thread)->clock - thread->sched_info.last_queued

sched_info_reset_dequeued(thread)
thread->sched_info.last_queued = 0

thread->sched_info.run_delay += delta

Since thread was running between enqueue_task(rq, thread) and
dequeue_task(rq, thread), the delta above is really execution
time and not run_delay.

d. results in:

__sched_info_switch(thread, next_thread)

sched_info_depart(rq, thread)

sched_info_queued(rq, thread)

/* last_queued not updated due to being non-zero */
return

Since thread was running between enqueue_task(rq, thread) and
__sched_info_switch(thread, next_thread), the execution time
between enqueue_task(rq, thread) and
__sched_info_switch(thread, next_thread) now will become
associated with run_delay due to when last_queued was last updated.

The proposed patch addresses the issue by calling
sched_info_reset_dequeued(thread) following the call to
enqueue_task(rq, thread) for running threads in situations in which
thread->sched_info.last_queued should remain 0.

Signed-off-by: Mike Meyer 
---
 kernel/sched/core.c | 36 ++--
 1 file changed, 30 insertions(+), 6 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2f9c928..88bfe43 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1187,8 +1187,12 @@ void do_set_cpus_allowed(struct task_struct *p, const 
struct cpumask *new_mask)
 
if (running)
p->sched_class->set_curr_task(rq);
-   if (queued)
+   if (queued) {
enqueue_task(rq, p, 0);
+
+   if (running)
+   sched_info_reset_dequeued(p);
+   }
 }
 
 /*
@@ -3378,9 +3382,13 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 
if (running)
p->sched_class->set_curr_task(rq);
-   if (queued)
+   if (queued) {
enqueue_task(rq, p, enqueue_flag);
 
+   if (running)
+   sched_info_reset_dequeued(p);
+   }
+
check_class_changed(rq, p, prev_class, oldprio);
 out_unlock:
preempt_disable(); /* avoid rq from going away on us */
@@ -3393,7 +3401,7 @@ out_unlock:
 
 void set_user_nice(struct task_struct *p, long nice)
 {
-   int old_prio, delta, queued;
+   int old_prio, delta, queued, running;
unsigned long flags;
struct rq *rq;
 
@@ -3415,6 +3423,7 @@ void set_user_nice(struct task_struct *p, long nice)
goto out_unlock;
}
queued = task_on_rq_queued(p);
+   running = task_current(rq, p);
if (queued)
dequeue_task(rq, p, 0);
 
@@ -3426,11 +3435,15 @@ void set_user_nice(struct task_struct *p, long nice)
 
if (queued) {
enqueue_task(rq, p, 0);
+
+   if (running)
+   sched_info_reset_dequeued(p);
+
/*
 * If the task increased its priority or is running and
 * lowered its priority, then reschedule its CPU:
 */
-   if (delta < 0 || (delta > 0 && task_running(rq, p)))
+   if (delta < 0 || (delta > 0 && running))
resched_curr(rq);
}
 out_unlock:
@@ -3945,6 +3958,9 @@ change:
 * increased (user space view).
 */
enqueue_task(rq, p, oldprio <= p->prio ? ENQUEUE_HEAD : 0);
+
+   if (running)
+   sched_info_reset_dequeued(p);
}
 
check_class_changed(rq, p, prev_class, oldprio);
@@ -5093,8 +5109,12 @@ void sched_setnuma(struct task_struct *p, int nid)
 
if (running)
   

Re: [PATCH v3 0/9] Broadcom Cygnus device tree changes

2015-09-22 Thread Florian Fainelli
On 21/09/15 15:12, Ray Jui wrote:
> This patch series cleans up the Broadcom Cygnus device tree files and makes it
> more consistent with the rest of Broadcom iProc device tree files. This patch
> series also enables various peripherals on Cygnus boards. They include:
> 
> bcm11360_entphn:
> NAND
> 
> bcm958300k:
> touchscreen
> 
> bcm958305k:
> I2C, PCIe, NAND, touchscreen
> 
> Code is based on v4.3-rc1 and is available on GITHUB:
> https://github.com/Broadcom/cygnus-linux/tree/cygnus-dt-v3
> 
> Changes from V2:
>  - Drop PCIe device node change that removes the I/O resource
>  - Set up appropriate address range for the 'core' bus
>  - Rename the 'soc' bus node to 'axi'
>  - Remove incorrect 3rd compatible string 'brcm,brcmnand' in the NAND node
> 
> Chages from V1:
>  - Break the major clean up change into separate patches

Series applied, thanks!
-- 
Florian
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v11 2/4] fpga manager: add sysfs interface document

2015-09-22 Thread Moritz Fischer
On Tue, Sep 22, 2015 at 8:21 AM,   wrote:
> From: Alan Tull 
>
> Add documentation under drivers/staging for new fpga manager's
> sysfs interface.
>
Reviewed-by: Moritz Fischer 
> Signed-off-by: Alan Tull 
> ---
> v5  : (actually second version, but keeping version numbers
>   aligned with rest of patch series)
>   Move document to drivers/staging/fpga/Documentation/ABI
>
> v6  : No change in this patch for v6 of the patch set
> v7  : No change in this patch for v7 of the patch set
> v8  : No change in this patch for v8 of the patch set
>
> v9  : Remove 'firmware' and 'reset' files
>   Update state strings
>
> v10 : Clarifications about state attribute
>   Move to Documentation/ABI/testing/
>
> v11 : No change in this patch for v11 of the patch set
> ---
>  Documentation/ABI/testing/sysfs-class-fpga-manager |   37 
> 
>  1 file changed, 37 insertions(+)
>  create mode 100644 Documentation/ABI/testing/sysfs-class-fpga-manager
>
> diff --git a/Documentation/ABI/testing/sysfs-class-fpga-manager 
> b/Documentation/ABI/testing/sysfs-class-fpga-manager
> new file mode 100644
> index 000..23056c5
> --- /dev/null
> +++ b/Documentation/ABI/testing/sysfs-class-fpga-manager
> @@ -0,0 +1,37 @@
> +What:  /sys/class/fpga_manager//name
> +Date:  August 2015
> +KernelVersion: 4.3
> +Contact:   Alan Tull 
> +Description:   Name of low level fpga manager driver.
> +
> +What:  /sys/class/fpga_manager//state
> +Date:  August 2015
> +KernelVersion: 4.3
> +Contact:   Alan Tull 
> +Description:   Read fpga manager state as a string.
> +   The intent is to provide enough detail that if something goes
> +   wrong during FPGA programming (something that the driver can't
> +   fix) then userspace can know, i.e. if the firmware request
> +   fails, that could be due to not being able to find the 
> firmware
> +   file.
> +
> +   This is a superset of FPGA states and fpga manager driver
> +   states.  The fpga manager driver is walking through these 
> steps
> +   to get the FPGA into a known operating state.  It's a 
> sequence,
> +   though some steps may get skipped.  Valid FPGA states will 
> vary
> +   by manufacturer; this is a superset.
> +
> +   * unknown   = can't determine state
> +   * power off = FPGA power is off
> +   * power up  = FPGA reports power is up
> +   * reset = FPGA held in reset state
> +   * firmware request  = firmware class request in progress
> +   * firmware request error = firmware request failed
> +   * write init= preparing FPGA for programming
> +   * write init error  = Error while preparing FPGA for
> + programming
> +   * write = FPGA ready to receive image data
> +   * write error   = Error while programming
> +   * write complete= Doing post programming steps
> +   * write complete error  = Error while doing post programming
> +   * operating = FPGA is programmed and operating
> --
> 1.7.9.5
>

Cheers,

Moritz
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v11 1/4] usage documentation for FPGA manager core

2015-09-22 Thread Moritz Fischer
Hi Alan,

On Tue, Sep 22, 2015 at 8:21 AM,   wrote:
> From: Alan Tull 
>
> Add a document on the new FPGA manager core.
>

Reviewed-by: Moritz Fischer 

> Signed-off-by: Alan Tull 
> ---
> v9:  initial version where this patch was added
>
> v10: requested cleanups to formatting and otherwise
>  s/fpga/FPGA/g
>  rewrite implementation section to not reference socfpga.c by name
>  other rewrites
>  Moved to Documentation/fpga/
>
> v11: s/with image/with an image/
>  s/on the path/in the path/
> ---
>  Documentation/fpga/fpga-mgr.txt |  171 
> +++
>  1 file changed, 171 insertions(+)
>  create mode 100644 Documentation/fpga/fpga-mgr.txt
>
> diff --git a/Documentation/fpga/fpga-mgr.txt b/Documentation/fpga/fpga-mgr.txt
> new file mode 100644
> index 000..ce3e84f
> --- /dev/null
> +++ b/Documentation/fpga/fpga-mgr.txt
> @@ -0,0 +1,171 @@
> +FPGA Manager Core
> +
> +Alan Tull 2015
> +
> +Overview
> +
> +
> +The FPGA manager core exports a set of functions for programming an FPGA with
> +an image.  The API is manufacturer agnostic.  All manufacturer specifics are
> +hidden away in a low level driver which registers a set of ops with the core.
> +The FPGA image data itself is very manufacturer specific, but for our 
> purposes
> +it's just binary data.  The FPGA manager core won't parse it.
> +
> +
> +API Functions:
> +==
> +
> +To program the FPGA from a file or from a buffer:
> +-
> +
> +   int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags,
> + const char *buf, size_t count);
> +
> +Load the FPGA from an image which exists as a buffer in memory.
> +
> +   int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags,
> +  const char *image_name);
> +
> +Load the FPGA from an image which exists as a file.  The image file must be 
> on
> +the firmware search path (see the firmware class documentation).
> +
> +For both these functions, flags == 0 for normal full reconfiguration or
> +FPGA_MGR_PARTIAL_RECONFIG for partial reconfiguration.  If successful, the 
> FPGA
> +ends up in operating mode.  Return 0 on success or a negative error code.
> +
> +
> +To get/put a reference to a FPGA manager:
> +-
> +
> +   struct fpga_manager *of_fpga_mgr_get(struct device_node *node);
> +
> +   void fpga_mgr_put(struct fpga_manager *mgr);
> +
> +Given a DT node, get an exclusive reference to a FPGA manager or release
> +the reference.
> +
> +
> +To register or unregister the low level FPGA-specific driver:
> +-
> +
> +   int fpga_mgr_register(struct device *dev, const char *name,
> + const struct fpga_manager_ops *mops,
> + void *priv);
> +
> +   void fpga_mgr_unregister(struct device *dev);
> +
> +Use of these two functions is described below in "How To Support a new FPGA
> +device."
> +
> +
> +How to write an image buffer to a supported FPGA
> +
> +/* Include to get the API */
> +#include 
> +
> +/* device node that specifies the FPGA manager to use */
> +struct device_node *mgr_node = ...
> +
> +/* FPGA image is in this buffer.  count is size of the buffer. */
> +char *buf = ...
> +int count = ...
> +
> +/* flags indicates whether to do full or partial reconfiguration */
> +int flags = 0;
> +
> +int ret;
> +
> +/* Get exclusive control of FPGA manager */
> +struct fpga_manager *mgr = of_fpga_mgr_get(mgr_node);
> +
> +/* Load the buffer to the FPGA */
> +ret = fpga_mgr_buf_load(mgr, flags, buf, count);
> +
> +/* Release the FPGA manager */
> +fpga_mgr_put(mgr);
> +
> +
> +How to write an image file to a supported FPGA
> +==
> +/* Include to get the API */
> +#include 
> +
> +/* device node that specifies the FPGA manager to use */
> +struct device_node *mgr_node = ...
> +
> +/* FPGA image is in this file which is in the firmware search path */
> +const char *path = "fpga-image-9.rbf"
> +
> +/* flags indicates whether to do full or partial reconfiguration */
> +int flags = 0;
> +
> +int ret;
> +
> +/* Get exclusive control of FPGA manager */
> +struct fpga_manager *mgr = of_fpga_mgr_get(mgr_node);
> +
> +/* Get the firmware image (path) and load it to the FPGA */
> +ret = fpga_mgr_firmware_load(mgr, flags, path);
> +
> +/* Release the FPGA manager */
> +fpga_mgr_put(mgr);
> +
> +
> +How to support a new FPGA device
> +
> +To add another FPGA manager, write a driver that implements a set of ops.  
> The
> +probe function calls fpga_mgr_register(), such as:
> +
> +static const struct fpga_manager_ops socfpga_fpga_ops = {
> +   .write_init = socfpga_fpga_ops_configure_init,
> +   .write = socfpga_fpga_ops_configure_write,
> +   

Re: [RFC 0/3] sched/idle: run-time support for setting idle polling

2015-09-22 Thread Rafael J. Wysocki
On Tuesday, September 22, 2015 04:34:19 PM Luiz Capitulino wrote:
> Hi,

Hi,

Please always CC patches related to power management to 
linux...@vger.kernel.org.

Also CCing Len Brown who's the maintainer of the intel_idle driver and Peter Z.

> Some archs allow the system administrator to set the
> idle thread behavior to spin instead of entering
> sleep states. The x86 arch, for example, has a idle=
> command-line parameter for this purpose.
> 
> However, the command-line parameter has two problems:
> 
>  1. You have to reboot if you change your mind
>  2. This setting affects all system cores
> 
> The second point is relevant for systems where cores
> are partitioned into bookkeeping and low-latency cores.
> Usually, it's OK for bookkeeping cores to enter deeper
> sleep states. It's only the low-latency cores that should
> poll when entering idle.

This looks like a use case for PM QoS to me rather.  You'd need to make it
work per-CPU rather than globally, but that really is about asking for
minimum latency.

> This series adds the following file:
> 
>  /sys/devices/system/cpu/cpu_idle
> 
> This file outputs and stores a cpumask of the cores
> which will have idle polling behavior.

I don't like this interface at all.

You have a cpuidle directory per core already, so what's the reason to add an
extra mask file really? 

> This implementation seems to work fine on x86, however
> it's RFC because of the following points (for which
> feedback is greatly appreciated):
> 
>  o I believe this implementation should work for all archs,
>but I can't confirm it as my machines and experience is
>limited to x86
> 
>  o Some x86 cpufreq drivers explicitly check if idle=poll
>was passed. Does anyone know if this is an optmization
>or is there actually a conflict between idle=poll and
>driver operation?

idle=poll is used as a workaround for platform defects on some systems IIRC.

>  o This series maintains cpu_idle_poll_ctrl() semantics
>which led to a more complex implementation. That is, today
>cpu_idle_poll_ctrl() increments or decrements a counter.
>A lot of arch code seems to count on this semantic, where
>cpu_idle_poll_ctrl(enable or false) calls have to match to
>enable or disable idle polling
> 
> Luiz Capitulino (3):
>   sched/idle: cpu_idle_poll(): drop unused return code
>   sched/idle: make cpu_idle_force_poll per-cpu
>   sched/idle: run-time support for setting idle polling
> 
>  drivers/base/cpu.c  | 44 
>  include/linux/cpu.h |  2 ++
>  kernel/sched/idle.c | 96 
> +
>  3 files changed, 129 insertions(+), 13 deletions(-)

Thanks,
Rafael

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH] memcg: make mem_cgroup_read_stat() unsigned

2015-09-22 Thread Greg Thelen
Andrew Morton wrote:

> On Tue, 22 Sep 2015 15:16:32 -0700 Greg Thelen  wrote:
>
>> mem_cgroup_read_stat() returns a page count by summing per cpu page
>> counters.  The summing is racy wrt. updates, so a transient negative sum
>> is possible.  Callers don't want negative values:
>> - mem_cgroup_wb_stats() doesn't want negative nr_dirty or nr_writeback.
>> - oom reports and memory.stat shouldn't show confusing negative usage.
>> - tree_usage() already avoids negatives.
>>
>> Avoid returning negative page counts from mem_cgroup_read_stat() and
>> convert it to unsigned.
>
> Someone please remind me why this code doesn't use the existing
> percpu_counter library which solved this problem years ago.
>
>>   for_each_possible_cpu(cpu)
>
> and which doesn't iterate across offlined CPUs.

I found [1] and [2] discussing memory layout differences between:
a) existing memcg hand rolled per cpu arrays of counters
vs
b) array of generic percpu_counter
The current approach was claimed to have lower memory overhead and
better cache behavior.

I assume it's pretty straightforward to create generic
percpu_counter_array routines which memcg could use.  Possibly something
like this could be made general enough could be created to satisfy
vmstat, but less clear.

[1] http://www.spinics.net/lists/cgroups/msg06216.html
[2] https://lkml.org/lkml/2014/9/11/1057
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH] net: mdio-octeon: Add PCI driver binding.

2015-09-22 Thread David Daney
From: David Daney 

When the Cavium mdio-octeon devices appear in the Thunder family of
arm64 based SoCs, they show up as PCI devices.  Add PCI driver
wrapping so the driver is bound in the standard PCI device scan.

When in this form, a single PCI device may have more than a single
bus, we call this a "nexus" of buses.  The standard firmware
device_for_each_child_node() iterator is used to find the individual
buses underneath the "nexus".

Update the device tree binding documentation for the new PCI driver
binding.

Signed-off-by: David Daney 
---
 .../devicetree/bindings/net/cavium-mdio.txt|  61 +++-
 drivers/net/phy/mdio-octeon.c  | 159 +++--
 2 files changed, 209 insertions(+), 11 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/cavium-mdio.txt 
b/Documentation/devicetree/bindings/net/cavium-mdio.txt
index 04cb749..020df08 100644
--- a/Documentation/devicetree/bindings/net/cavium-mdio.txt
+++ b/Documentation/devicetree/bindings/net/cavium-mdio.txt
@@ -1,9 +1,12 @@
 * System Management Interface (SMI) / MDIO
 
 Properties:
-- compatible: "cavium,octeon-3860-mdio"
+- compatible: One of:
 
-  Compatibility with all cn3XXX, cn5XXX and cn6XXX SOCs.
+   "cavium,octeon-3860-mdio": Compatibility with all cn3XXX, cn5XXX
+   and cn6XXX SOCs.
+
+   "cavium,thunder-8890-mdio": Compatibility with all cn8XXX SOCs.
 
 - reg: The base address of the MDIO bus controller register bank.
 
@@ -25,3 +28,57 @@ Example:
reg = <0>;
};
};
+
+
+* System Management Interface (SMI) / MDIO Nexus
+
+  Several mdio buses may be gathered as children of a single PCI
+  device, this PCI device is the nexus of the buses.
+
+Properties:
+
+- compatible: "cavium,thunder-8890-mdio-nexus";
+
+- reg: The PCI device and function numbers of the nexus device.
+
+- #address-cells: Must be <2>.
+
+- #size-cells: Must be <2>.
+
+- ranges: As needed for mapping of the MDIO bus device registers.
+
+- assigned-addresses: As needed for mapping of the MDIO bus device registers.
+
+Example:
+
+mdio-nexus@1,3 {
+compatible = "cavium,thunder-8890-mdio-nexus";
+#address-cells = <2>;
+#size-cells = <2>;
+reg = <0x0b00 0 0 0 0>; /* DEVFN = 0x0b (1:3) */
+assigned-addresses = <0x0300 0x87e0 0x0500 0x0 
0x80>;
+ranges = <0x87e0 0x0500 0x0300 0x87e0 0x0500 0x0 
0x80>;
+
+mdio0@87e0,05003800 {
+compatible = "cavium,thunder-8890-mdio";
+#address-cells = <1>;
+#size-cells = <0>;
+reg = <0x87e0 0x05003800 0x0 0x30>;
+
+ethernet-phy@0 {
+...
+reg = <0>;
+};
+};
+mdio0@87e0,05003880 {
+compatible = "cavium,thunder-8890-mdio";
+#address-cells = <1>;
+#size-cells = <0>;
+reg = <0x87e0 0x05003880 0x0 0x30>;
+
+ethernet-phy@0 {
+...
+reg = <0>;
+};
+};
+};
diff --git a/drivers/net/phy/mdio-octeon.c b/drivers/net/phy/mdio-octeon.c
index fcf4e4d..21eca35 100644
--- a/drivers/net/phy/mdio-octeon.c
+++ b/drivers/net/phy/mdio-octeon.c
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 2009-2012 Cavium, Inc.
+ * Copyright (C) 2009-2015 Cavium, Inc.
  */
 
 #include 
@@ -14,6 +14,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_CAVIUM_OCTEON_SOC
 #include 
@@ -110,8 +111,6 @@ enum octeon_mdiobus_mode {
 struct octeon_mdiobus {
struct mii_bus *mii_bus;
u64 register_base;
-   resource_size_t mdio_phys;
-   resource_size_t regsize;
enum octeon_mdiobus_mode mode;
int phy_irq[PHY_MAX_ADDR];
 };
@@ -269,6 +268,8 @@ static int octeon_mdiobus_probe(struct platform_device 
*pdev)
 {
struct octeon_mdiobus *bus;
struct resource *res_mem;
+   resource_size_t mdio_phys;
+   resource_size_t regsize;
union cvmx_smix_en smi_en;
int err = -ENOENT;
 
@@ -282,17 +283,17 @@ static int octeon_mdiobus_probe(struct platform_device 
*pdev)
return -ENXIO;
}
 
-   bus->mdio_phys = res_mem->start;
-   bus->regsize = resource_size(res_mem);
+   mdio_phys = res_mem->start;
+   regsize = resource_size(res_mem);
 
-   if (!devm_request_mem_region(>dev, bus->mdio_phys, bus->regsize,
+   if (!devm_request_mem_region(>dev, mdio_phys, regsize,
 res_mem->name)) {
dev_err(>dev, "request_mem_region 

Re: [PATCH] net: dsa: Fix Marvell Egress Trailer check

2015-09-22 Thread David Miller
From: Neil Armstrong 
Date: Tue, 22 Sep 2015 11:28:14 +0200

> The Marvell Egress rx trailer check must be fixed to
> correctly detect bad bits in the third byte of the
> Eggress trailer as described in the Table 28 of the
> 88E6060 datasheet.
> The current code incorrectly omits to check the third
> byte and checks the fourth byte twice.
> 
> Signed-off-by: Neil Armstrong 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] lib: fix data race in rhashtable_rehash_one

2015-09-22 Thread David Miller
From: Dmitry Vyukov 
Date: Tue, 22 Sep 2015 10:51:52 +0200

> rhashtable_rehash_one() uses complex logic to update entry->next field,
> after INIT_RHT_NULLS_HEAD and NULLS_MARKER expansion:
> 
> entry->next = 1 | ((base + off) << 1)
> 
> This can be compiled along the lines of:
> 
> entry->next = base + off
> entry->next <<= 1
> entry->next |= 1
> 
> Which will break concurrent readers.
> 
> NULLS value recomputation is not needed here, so just remove
> the complex logic.
> 
> The data race was found with KernelThreadSanitizer (KTSAN).
> 
> Signed-off-by: Dmitry Vyukov 

Applied, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v3 2/4] Documentation: bindings: mfd: cros ec: document vbc EC property

2015-09-22 Thread Lee Jones
On Mon, 21 Sep 2015, Emilio López wrote:

> Some EC implementations include a small nvram space used to store
> verified boot context data. This boolean property lets us indicate
> whether this space is available or not on a specific EC implementation.
> 
> Signed-off-by: Emilio López 
> ---
> 
> Patch is new in v3, split from 3/4
> 
>  Documentation/devicetree/bindings/mfd/cros-ec.txt | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/Documentation/devicetree/bindings/mfd/cros-ec.txt 
> b/Documentation/devicetree/bindings/mfd/cros-ec.txt
> index 1777916..136e0c2 100644
> --- a/Documentation/devicetree/bindings/mfd/cros-ec.txt
> +++ b/Documentation/devicetree/bindings/mfd/cros-ec.txt
> @@ -34,6 +34,10 @@ Required properties (LPC):
>  - compatible: "google,cros-ec-lpc"
>  - reg: List of (IO address, size) pairs defining the interface uses
>  
> +Optional properties (all):
> +- google,has-vbc-nvram: Some implementations of the EC include a small
> +  nvram space used to store verified boot context data. This boolean flag
> +  is used to specify whether this nvram is present or not.

Is there no way to check for this at runtime?

>  Example for I2C:
>  

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for ARM SoCs
Follow Linaro: Facebook | Twitter | Blog
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v10 1/4] clk: clk-vf610: Add clock for Vybrid OCOTP controller

2015-09-22 Thread Shawn Guo
On Mon, Sep 07, 2015 at 01:51:35PM +0530, Sanchayan Maity wrote:
> Add clock support for Vybrid On-Chip One Time Programmable
> (OCOTP) controller.
> 
> While the OCOTP block does not require explicit clock gating,
> for programming the OCOTP timing register the clock rate of
> ipg clock is required for timing calculations related to fuse
> and shadow register read sequence. We explicitly specify the
> ipg clock for OCOTP as a result.
> 
> Signed-off-by: Sanchayan Maity 

Applied this one, thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH v2] mfd: lpc_ich: Separate device cells for clarity

2015-09-22 Thread Lee Jones
On Tue, 22 Sep 2015, Aaron Sierra wrote:

> The lpc_ich_cells array gives the wrong impression about the
> relationship between the watchdog and GPIO devices. They are
> completely distinct devices, so this patch separates the
> array into distinct mfd_cell structs per device.
> 
> A side effect of removing the array, is that the lpc_cells enum
> is no longer needed.
> 
> Signed-off-by: Aaron Sierra 
> ---
>  v2 - rebase onto 4.3-rc2
> 
>  drivers/mfd/lpc_ich.c | 42 ++
>  1 file changed, 18 insertions(+), 24 deletions(-)

Looks like you didn't apply Andy's Ack!

Applied with Andy's Ack.

> diff --git a/drivers/mfd/lpc_ich.c b/drivers/mfd/lpc_ich.c
> index c5a9a08..b514f3c 100644
> --- a/drivers/mfd/lpc_ich.c
> +++ b/drivers/mfd/lpc_ich.c
> @@ -132,24 +132,18 @@ static struct resource gpio_ich_res[] = {
>   },
>  };
>  
> -enum lpc_cells {
> - LPC_WDT = 0,
> - LPC_GPIO,
> +static struct mfd_cell lpc_ich_wdt_cell = {
> + .name = "iTCO_wdt",
> + .num_resources = ARRAY_SIZE(wdt_ich_res),
> + .resources = wdt_ich_res,
> + .ignore_resource_conflicts = true,
>  };
>  
> -static struct mfd_cell lpc_ich_cells[] = {
> - [LPC_WDT] = {
> - .name = "iTCO_wdt",
> - .num_resources = ARRAY_SIZE(wdt_ich_res),
> - .resources = wdt_ich_res,
> - .ignore_resource_conflicts = true,
> - },
> - [LPC_GPIO] = {
> - .name = "gpio_ich",
> - .num_resources = ARRAY_SIZE(gpio_ich_res),
> - .resources = gpio_ich_res,
> - .ignore_resource_conflicts = true,
> - },
> +static struct mfd_cell lpc_ich_gpio_cell = {
> + .name = "gpio_ich",
> + .num_resources = ARRAY_SIZE(gpio_ich_res),
> + .resources = gpio_ich_res,
> + .ignore_resource_conflicts = true,
>  };
>  
>  /* chipset related info */
> @@ -841,7 +835,7 @@ static int lpc_ich_finalize_wdt_cell(struct pci_dev *dev)
>   struct itco_wdt_platform_data *pdata;
>   struct lpc_ich_priv *priv = pci_get_drvdata(dev);
>   struct lpc_ich_info *info;
> - struct mfd_cell *cell = _ich_cells[LPC_WDT];
> + struct mfd_cell *cell = _ich_wdt_cell;
>  
>   pdata = devm_kzalloc(>dev, sizeof(*pdata), GFP_KERNEL);
>   if (!pdata)
> @@ -860,7 +854,7 @@ static int lpc_ich_finalize_wdt_cell(struct pci_dev *dev)
>  static void lpc_ich_finalize_gpio_cell(struct pci_dev *dev)
>  {
>   struct lpc_ich_priv *priv = pci_get_drvdata(dev);
> - struct mfd_cell *cell = _ich_cells[LPC_GPIO];
> + struct mfd_cell *cell = _ich_gpio_cell;
>  
>   cell->platform_data = _chipset_info[priv->chipset];
>   cell->pdata_size = sizeof(struct lpc_ich_info);
> @@ -904,7 +898,7 @@ static int lpc_ich_init_gpio(struct pci_dev *dev)
>   base_addr = base_addr_cfg & 0xff80;
>   if (!base_addr) {
>   dev_notice(>dev, "I/O space for ACPI uninitialized\n");
> - lpc_ich_cells[LPC_GPIO].num_resources--;
> + lpc_ich_gpio_cell.num_resources--;
>   goto gpe0_done;
>   }
>  
> @@ -918,7 +912,7 @@ static int lpc_ich_init_gpio(struct pci_dev *dev)
>* the platform_device subsystem doesn't see this resource
>* or it will register an invalid region.
>*/
> - lpc_ich_cells[LPC_GPIO].num_resources--;
> + lpc_ich_gpio_cell.num_resources--;
>   acpi_conflict = true;
>   } else {
>   lpc_ich_enable_acpi_space(dev);
> @@ -958,12 +952,12 @@ gpe0_done:
>  
>   lpc_ich_finalize_gpio_cell(dev);
>   ret = mfd_add_devices(>dev, PLATFORM_DEVID_AUTO,
> -   _ich_cells[LPC_GPIO], 1, NULL, 0, NULL);
> +   _ich_gpio_cell, 1, NULL, 0, NULL);
>  
>  gpio_done:
>   if (acpi_conflict)
>   pr_warn("Resource conflict(s) found affecting %s\n",
> - lpc_ich_cells[LPC_GPIO].name);
> + lpc_ich_gpio_cell.name);
>   return ret;
>  }
>  
> @@ -1007,7 +1001,7 @@ static int lpc_ich_init_wdt(struct pci_dev *dev)
>*/
>   if (lpc_chipset_info[priv->chipset].iTCO_version == 1) {
>   /* Don't register iomem for TCO ver 1 */
> - lpc_ich_cells[LPC_WDT].num_resources--;
> + lpc_ich_wdt_cell.num_resources--;
>   } else if (lpc_chipset_info[priv->chipset].iTCO_version == 2) {
>   pci_read_config_dword(dev, RCBABASE, _addr_cfg);
>   base_addr = base_addr_cfg & 0xc000;
> @@ -1035,7 +1029,7 @@ static int lpc_ich_init_wdt(struct pci_dev *dev)
>   goto wdt_done;
>  
>   ret = mfd_add_devices(>dev, PLATFORM_DEVID_AUTO,
> -   _ich_cells[LPC_WDT], 1, NULL, 0, NULL);
> +   _ich_wdt_cell, 1, NULL, 0, NULL);
>  
>  wdt_done:
>   return ret;

-- 
Lee Jones
Linaro STMicroelectronics Landing Team Lead
Linaro.org │ Open source software for 

  1   2   3   4   5   6   7   8   9   10   >