Re: mailman From rewriting [was perf jit: genelf makes assumptions about endian]

2016-03-29 Thread Jeremy Kerr
Hi Stephen,

>> Do you know why mailman would be re-writing From: there? It's confusing
>> patchwork, as multiple mails are now coming from that address.
> 
> Yep, Anton posts from samba.org.  They publish a DMARC policy that
> breaks mailing lists.

(╯°□°)╯︵ ┻━━┻

This also breaks git-am:

  [jk@pudge linux]$ git am incoming.eml
  Applying: perf jit: genelf makes assumptions about endian
  [jk@pudge linux]$ git log --format='format:%an <%ae>' -1
  Anton Blanchard via Linuxppc-dev 

> The best thing we can do is to do the above rewrite of the From header.

OK, it looks like we're stuck either way with DMARC. Could we make this
a little more tolerable by stashing the original From: value in a new
header? I know it's already in Reply-To, but that could also be set by
arbitrary other (non-mailman-DMARC-rewrite) sources.

Alternatively, if there's some other way to tell that this a mail has
been rewritten, we can know to use Reply-To in preference to From.

Otherwise, I guess we could require that *all patch submitters* put
their From: line in the content of their mails, as git send-email does
when user != author. But that's a little less-than-optimal.

Cheers,


Jeremy
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/2] powerpc/pagetable: Add option to dump kernel hashpagetable

2016-03-29 Thread Balbir Singh


On 22/03/16 09:04, Rashmica Gupta wrote:
> Useful to be able to dump the kernel hash page table to check
> which pages are hashed along with their sizes and other details.
>
> Add a debugfs file to check the page tables. To use this the PPC_PTDUMP
> config option must be selected.
>
> Signed-off-by: Rashmica Gupta 
> ---
>  arch/powerpc/mm/Makefile |   3 +-
>  arch/powerpc/mm/dump_hashpagetable.c | 488 
> +++
>  2 files changed, 490 insertions(+), 1 deletion(-)
>  create mode 100644 arch/powerpc/mm/dump_hashpagetable.c
>
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index 6935c6204fbc..0bd48b345ff3 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -41,4 +41,5 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
>  obj-$(CONFIG_HIGHMEM)+= highmem.o
>  obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
>  obj-$(CONFIG_SPAPR_TCE_IOMMU)+= mmu_context_iommu.o
> -obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
> +obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o \
> +dump_hashpagetable.o
> diff --git a/arch/powerpc/mm/dump_hashpagetable.c 
> b/arch/powerpc/mm/dump_hashpagetable.c
> new file mode 100644
> index ..044fd13e4d07
> --- /dev/null
> +++ b/arch/powerpc/mm/dump_hashpagetable.c
> @@ -0,0 +1,488 @@
> +/*
> + * Copyright 2016, Rashmica Gupta, IBM Corp.
> + *
> + * This traverses the kernel virtual memory and dumps the pages that are in
> + * the hash pagetable, along with their flags to
> + * /sys/kernel/debug/kernel_hash_pagetable.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct addr_marker {
> + unsigned long start_address;
> + const char *name;
> +};
> +
> +static struct addr_marker address_markers[] = {
> + { PAGE_OFFSET,  "Start of kernel VM"},
> + { VMALLOC_START,"vmalloc() Area" },
> + { VMALLOC_END,  "vmalloc() End" },
> + { ISA_IO_BASE,  "isa I/O start" },
> + { ISA_IO_END,   "isa I/O end" },
> + { PHB_IO_BASE,  "phb I/O start" },
> + { PHB_IO_END,   "phb I/O end" },
> + { IOREMAP_BASE, "I/O remap start" },
> + { IOREMAP_END,  "I/O remap end" },
> + { VMEMMAP_BASE, "vmemmap start" },
> + { -1,   NULL },
> +};
> +
> +struct pg_state {
> + struct seq_file *seq;
> + const struct addr_marker *marker;
> + unsigned long start_address;
> + unsigned level;
> + u64 current_flags;
> +};
> +
> +struct flag_info {
> + u64 mask;
> + u64 val;
> + const char  *set;
> + const char  *clear;
> + boolis_val;
> +};
> +
> +static const struct flag_info v_flag_array[] = {
> + {
> + .mask   = SLB_VSID_B,
> + .val= SLB_VSID_B_256M,
> + .set= "ssize: 256M",
> + .clear  = "ssize: 1T  ",
> + }, {
> + .mask   = HPTE_V_SECONDARY,
> + .val= HPTE_V_SECONDARY,
> + .set= "secondary",
> + .clear  = "primary  ",
> + }, {
> + .mask   = HPTE_V_VALID,
> + .val= HPTE_V_VALID,
> + .set= "valid  ",
> + .clear  = "invalid",
> + }, {
> + .mask   = HPTE_V_BOLTED,
> + .val= HPTE_V_BOLTED,
> + .set= "bolted",
> + .clear  = "",
> + }
> +};
> +
> +static const struct flag_info r_flag_array[] = {
> + {
> + .mask   = HPTE_R_PP0 | HPTE_R_PP,
> + .val= HPTE_R_PP0 | HPTE_R_PP,
> + .set= "prot",
> + .clear  = "",
> + .is_val = true,
> + }, {
> + .mask   = HPTE_R_KEY_HI | HPTE_R_KEY_LO,
> + .val= HPTE_R_KEY_HI | HPTE_R_KEY_LO,
> + .set= "key",
> + .clear  = "",
> + .is_val = true,
> + }, {
> + .mask   = HPTE_R_R,
> + .val= HPTE_R_R,
> + .set= "ref",
> + .clear  = "   ",
> + }, {
> + .mask   = HPTE_R_C,
> + .val= HPTE_R_C,
> + .set= "changed",
> + .clear  = "   ",
> + }, {
> + .mask   = HPTE_R_N,
> + .val= HPTE_R_N,
> + .set= "no execute",
> + .clear  = "",
> + }, {
> + .mask   = HPTE_R_WIMG,
> + .val= HPTE_R_W,
> + .set= "writethru",
> + 

Re: [PATCH 1/2] powerpc/pagetable: Add option to dump the linux pagetables

2016-03-29 Thread Balbir Singh


On 22/03/16 09:04, Rashmica Gupta wrote:
> Useful to be able to dump the kernels page tables to check permissions
> and memory types - derived from arm64's implementation.
>
> Add a debugfs file to check the page tables. To use this the PPC_PTDUMP
> config option must be selected.
>
> Signed-off-by: Rashmica Gupta 
> ---
>  arch/powerpc/Kconfig.debug |  12 ++
>  arch/powerpc/mm/Makefile   |   1 +
>  arch/powerpc/mm/dump_linuxpagetables.c | 377 
> +
>  3 files changed, 390 insertions(+)
>  create mode 100644 arch/powerpc/mm/dump_linuxpagetables.c
>
> diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug
> index 638f9ce740f5..26a60effea1a 100644
> --- a/arch/powerpc/Kconfig.debug
> +++ b/arch/powerpc/Kconfig.debug
> @@ -344,4 +344,16 @@ config FAIL_IOMMU
>  
> If you are unsure, say N.
>  
> +config PPC_PTDUMP
> +bool "Export kernel pagetable layout to userspace via debugfs"
> +depends on DEBUG_KERNEL
> +select DEBUG_FS
> +help
> +   This option exports the state of the kernel pagetables to a
> +   debugfs file. This is only useful for kernel developers who are
> +   working in architecture specific areas of the kernel - probably
> +   not a good idea to enable this feature in a production kernel.
> +
> +   If you are unsure, say N.
> +

Some minor comments below, but otherwise

Acked-by: Balbir Singh 

>  endmenu
> diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
> index adfee3f1aeb9..6935c6204fbc 100644
> --- a/arch/powerpc/mm/Makefile
> +++ b/arch/powerpc/mm/Makefile
> @@ -41,3 +41,4 @@ obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
>  obj-$(CONFIG_HIGHMEM)+= highmem.o
>  obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
>  obj-$(CONFIG_SPAPR_TCE_IOMMU)+= mmu_context_iommu.o
> +obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
> diff --git a/arch/powerpc/mm/dump_linuxpagetables.c 
> b/arch/powerpc/mm/dump_linuxpagetables.c
> new file mode 100644
> index ..f97fbfdac4b9
> --- /dev/null
> +++ b/arch/powerpc/mm/dump_linuxpagetables.c
> @@ -0,0 +1,377 @@
> +/*
> + * Copyright 2016, Rashmica Gupta, IBM Corp.
> + *
> + * This traverses the kernel pagetables and dumps the
> + * information about the used sections of memory to
> + * /sys/kernel/debug/kernel_pagetables.
> + *
> + * Derived from the arm64 implementation:
> + * Copyright (c) 2014, The Linux Foundation, Laura Abbott.
> + * (C) Copyright 2008 Intel Corporation, Arjan van de Ven.
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License
> + * as published by the Free Software Foundation; version 2
> + * of the License.
> + */
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +struct addr_marker {
> + unsigned long start_address;
> + const char *name;
> +};
> +
> +static struct addr_marker address_markers[] = {
> + { VMALLOC_START,"vmalloc() Area" },
> + { VMALLOC_END,  "vmalloc() End" },
> + { ISA_IO_BASE,  "isa I/O start" },
> + { ISA_IO_END,   "isa I/O end" },
> + { PHB_IO_BASE,  "phb I/O start" },
> + { PHB_IO_END,   "phb I/O end" },
> + { IOREMAP_BASE, "I/O remap start" },
> + { IOREMAP_END,  "I/O remap end" },
> + { -1,   NULL },
> +};
> +
> +/*
> + * To visualise what is happening,
> + *
> + *  - PTRS_PER_P** = how many entries there are in the corresponding P**
> + *  - P**_SHIFT = how many bits of the address we use to index into the
> + * corresponding P**
> + *  - P**_SIZE is how much memory we can access through the table - not the
> + * size of the table itself.
> + * P**={PGD, PUD, PMD, PTE}
> + *
> + *
> + * Each entry of the PGD points to a PUD. Each entry of a PUD points to a
> + * PMD. Each entry of a PMD points to a PTE. And every PTE entry points to
> + * a page.
> + *
> + * In the case where there are only 3 levels, the PUD is folded into the
> + * PGD: every PUD has only one entry which points to the PMD.
> + *
> + * The page dumper groups page table entries of the same type into a single
> + * description. It uses pg_state to track the range information while
> + * iterating over the PTE entries. When the continuity is broken it then
> + * dumps out a description of the range - ie PTEs that are virtually 
> contiguous
> + * with the same PTE flags are chunked together. This is to make it clear how
> + * different areas of the kernel virtual memory are used.
> + *
> + */
> +struct pg_state {
> + struct seq_file *seq;
> + const struct addr_marker *marker;
> + unsigned long start_address;
> + unsigned level;
> + u64 current_flags;
> +};
> +
> +struct flag_info {
> + u64   

Re: mailman From rewriting [was perf jit: genelf makes assumptions about endian]

2016-03-29 Thread Michael Ellerman
On Tue, 2016-03-29 at 21:06 +1100, Stephen Rothwell wrote:
> Hi Jeremy,
> 
> On Tue, 29 Mar 2016 16:56:58 +0800 Jeremy Kerr  wrote:
> > 
> > From the mail that Anton has sent to linuxppc-dev:
> > 
> > > From: Anton Blanchard via Linuxppc-dev 
> > > Reply-To: Anton Blanchard   
> > 
> > Do you know why mailman would be re-writing From: there? It's confusing
> > patchwork, as multiple mails are now coming from that address.
> 
> Yep, Anton posts from samba.org.  They publish a DMARC policy that
> breaks mailing lists.  The best thing we can do is to do the above
> rewrite of the From header.
> 
> The alternative is that all our Yahoo, Gmail and Hotmail subscribers
> (at least) would bounce the email.
> 
> So this rewriting will happen for any From: line that has an address
> that has such a DMARC policy.  We have had posts from Yahoo subscribers
> bounced in the past.

[adding patchwork list to cc]

So it sounds like this is just something we're going to have to live with.

It's causing problems on patchwork because patchwork only uses the email
address to lookup a user. This is causing patches to be misattributed on
ozlabs.org, eg:

  http://patchwork.ozlabs.org/project/linuxppc-dev/list/?submitter=68366=*

Paul was the first person to have a From header rewritten, eg to:

  From: Paul Mackerras via Linuxppc-dev 

And so in the patchwork database the user "linuxppc-dev@lists.ozlabs.org" has a
name of "Paul Mackerras via Linuxppc-dev".

The subsequent two patches (ppc4xx-rng and perf jit) are from different people,
who also had their From address rewritten due to DMARC.

So I think patchwork needs to:
 - detect mails that have been rewritten due to DMARC
   - AFAICS the only way to do this is to notice that the From address is the
 same as the list address.
 - when it sees those mails, use the Reply-To header instead.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] perf jit: genelf makes assumptions about endian

2016-03-29 Thread Michael Ellerman
On Tue, 2016-03-29 at 17:59 +1100, Anton Blanchard wrote:

> Commit 9b07e27f88b9 ("perf inject: Add jitdump mmap injection support")
> incorrectly assumed that PowerPC is big endian only.
> 
> Simplify things by consolidating the define of GEN_ELF_ENDIAN and checking
> for __BYTE_ORDER == __BIG_ENDIAN.
> 
> The PowerPC checks were also incorrect, they do not match what gcc
> emits. We should first look for __powerpc64__, then __powerpc__.
> 
> Fixes: 9b07e27f88b9 ("perf inject: Add jitdump mmap injection support")
> Signed-off-by: Anton Blanchard 

The diff's a little hard to read because you're pulling the endian logic out,
if I remove that I get something like:

>  #elif defined(__i386__)
>  #define GEN_ELF_ARCH EM_386
>  #define GEN_ELF_CLASSELFCLASS32
> -#elif defined(__ppcle__)
> -#define GEN_ELF_ARCH EM_PPC
> -#define GEN_ELF_CLASSELFCLASS64
> -#elif defined(__powerpc__)
> -#define GEN_ELF_ARCH EM_PPC64
> -#define GEN_ELF_CLASSELFCLASS64
> -#elif defined(__powerpcle__)
> +#elif defined(__powerpc64__)
>  #define GEN_ELF_ARCH EM_PPC64
>  #define GEN_ELF_CLASSELFCLASS64
> +#elif defined(__powerpc__)
> +#define GEN_ELF_ARCH EM_PPC
> +#define GEN_ELF_CLASSELFCLASS32
>  #else
>  #error "unsupported architecture"
>  #endif

Which looks correct to me.

And the consolidation of the endian logic is "obviously correct", so:

Acked-by: Michael Ellerman 

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 65/65] powerpc/mm/radix: Cputable update for radix

2016-03-29 Thread Michael Neuling
On Sun, 2016-03-27 at 13:54 +0530, Aneesh Kumar K.V wrote:
> This patch move the existing p9 hash to a different PVR and add
> radix feature with p9 PVR. That implies we will not be able to
> runtime select P9 hash. With P9 Radix we need to do
> 
> * set UPRT = 0 in cpu setup
> * set different TLB set count
> 
> We ideally want to use ibm,pa-features to enable disable radix. But
> we have already done setup cpu by the time we reach pa-features check.

What would we need change to disable radix after?  Can't we just update the 
cur_cpu_spec if we hit this and change some LPCR bits?

> So for now use this hack.
> 
> Not-Signed-off-by: Aneesh Kumar K.V 

Good, indeed this is a pretty gross hack :-)

Why not change the existing cputable entry to do radix and lets 
forget about hash.

Mikey

> ---
>  arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
>  arch/powerpc/include/asm/reg.h|  4 +++
>  arch/powerpc/kernel/cpu_setup_power.S | 35 
> +++
>  arch/powerpc/kernel/cputable.c| 29 +++---
>  arch/powerpc/kernel/mce_power.c   |  4 +++
>  5 files changed, 70 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
> b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> index 7da61b85406b..290157e8d5b2 100644
> --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
> @@ -119,6 +119,7 @@
>  #define POWER7_TLB_SETS  128 /* # sets in POWER7 TLB */
>  #define POWER8_TLB_SETS  512 /* # sets in POWER8 TLB */
>  #define POWER9_TLB_SETS_HASH 256 /* # sets in POWER9 TLB Hash mode */
> +#define POWER9_TLB_SETS_RADIX128 /* # sets in POWER9 TLB Radix 
> mode */
>  
>  #ifndef __ASSEMBLY__
>  
> diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
> index 257251ada3a3..d39b5fedabfd 100644
> --- a/arch/powerpc/include/asm/reg.h
> +++ b/arch/powerpc/include/asm/reg.h
> @@ -347,6 +347,10 @@
>  #define   LPCR_LPES_SH   2
>  #define   LPCR_RMI 0x0002  /* real mode is cache inhibit */
>  #define   LPCR_HDICE   0x0001  /* Hyp Decr enable (HV,PR,EE) */
> +/*
> + * Used in asm code, hence we don't want to use PPC_BITCOUNT
> + */
> +#defineLPCR_UPRT (ASM_CONST(0x1) << (64 - 42))
>  #ifndef SPRN_LPID
>  #define SPRN_LPID0x13F   /* Logical Partition Identifier */
>  #endif
> diff --git a/arch/powerpc/kernel/cpu_setup_power.S 
> b/arch/powerpc/kernel/cpu_setup_power.S
> index 584e119fa8b0..e9b76c651bd1 100644
> --- a/arch/powerpc/kernel/cpu_setup_power.S
> +++ b/arch/powerpc/kernel/cpu_setup_power.S
> @@ -117,6 +117,41 @@ _GLOBAL(__restore_cpu_power9)
>   mtlrr11
>   blr
>  
> +_GLOBAL(__setup_cpu_power9_uprt)
> + mflrr11
> + bl  __init_FSCR
> + bl  __init_hvmode_206
> + mtlrr11
> + beqlr
> + li  r0,0
> + mtspr   SPRN_LPID,r0
> + mfspr   r3,SPRN_LPCR
> + ori r3, r3, LPCR_PECEDH
> + orisr3,r3,(LPCR_UPRT >> 16)
> + bl  __init_LPCR
> + bl  __init_HFSCR
> + bl  __init_tlb_power7
> + mtlrr11
> + blr
> +
> +_GLOBAL(__restore_cpu_power9_uprt)
> + mflrr11
> + bl  __init_FSCR
> + mfmsr   r3
> + rldicl. r0,r3,4,63
> + mtlrr11
> + beqlr
> + li  r0,0
> + mtspr   SPRN_LPID,r0
> + mfspr   r3,SPRN_LPCR
> + ori r3, r3, LPCR_PECEDH
> + orisr3,r3,(LPCR_UPRT >> 16)
> + bl  __init_LPCR
> + bl  __init_HFSCR
> + bl  __init_tlb_power7
> + mtlrr11
> + blr
> +
>  __init_hvmode_206:
>   /* Disable CPU_FTR_HVMODE and exit if MSR:HV is not set */
>   mfmsr   r3
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index be4d73053bed..878bcc46ea04 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -72,10 +72,13 @@ extern void __setup_cpu_power8(unsigned long offset, 
> struct cpu_spec* spec);
>  extern void __restore_cpu_power8(void);
>  extern void __setup_cpu_power9(unsigned long offset, struct cpu_spec* spec);
>  extern void __restore_cpu_power9(void);
> +extern void __setup_cpu_power9_uprt(unsigned long offset, struct cpu_spec* 
> spec);
> +extern void __restore_cpu_power9_uprt(void);
>  extern void __restore_cpu_a2(void);
>  extern void __flush_tlb_power7(unsigned int action);
>  extern void __flush_tlb_power8(unsigned int action);
>  extern void __flush_tlb_power9(unsigned int action);
> +extern void __flush_tlb_power9_radix(unsigned int action);
>  extern long __machine_check_early_realmode_p7(struct pt_regs *regs);
>  extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
>  #endif /* CONFIG_PPC64 */
> @@ -508,9 +511,9 @@ static struct cpu_spec __initdata cpu_specs[] = {
>   .platform   = "power8",
>   },
>   

Re: [PATCH v2] powerpc: mm: fixup preempt undefflow with huge pages

2016-03-29 Thread Michael Ellerman
On Tue, 2016-03-29 at 15:40 +0200, Sebastian Andrzej Siewior wrote:

> On 2016-03-10 01:04:24 [+0530], Aneesh Kumar K.V wrote:

> > Sebastian Andrzej Siewior  writes:
> 
> *ping*
> http://patchwork.ozlabs.org/patch/593943/

*pong*

The merge window just closed, I'm still recovering.

I've got it in my fixes branch locally, I'll probably push that today to
linux-next.

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-29 Thread Michael Ellerman
On Tue, 2016-29-03 at 18:34:37 UTC, Hari Bathini wrote:
> diff --git a/arch/powerpc/kernel/exceptions-64s.S 
> b/arch/powerpc/kernel/exceptions-64s.S
> index 7716ceb..e598580 100644
> --- a/arch/powerpc/kernel/exceptions-64s.S
> +++ b/arch/powerpc/kernel/exceptions-64s.S
> @@ -764,8 +764,8 @@ kvmppc_skip_Hinterrupt:
>  #endif
>  
>  /*
> - * Code from here down to __end_handlers is invoked from the
> - * exception prologs above.  Because the prologs assemble the
> + * Code from here down to end of out of line handlers is invoked from
> + * the exception prologs above.  Because the prologs assemble the

I think it would be better to just replace __end_handlers with __end_interrupts,
that way it's entirely clear what location you're talking about.

> @@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
>  #endif
>   STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
>  
> - /* Other future vectors */
> - .align  7
> - .globl  __end_interrupts
> -__end_interrupts:
> -
>   .align  7
>  system_call_entry:
>   b   system_call_common
> @@ -1230,10 +1225,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
>   STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
> facility_unavailable_exception)
>   STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
> facility_unavailable_exception)
>  
> - .align  7
> - .globl  __end_handlers
> -__end_handlers:
> -

Sorry I wasn't clear in my last mail, please do this as a separate cleanup patch
after this patch.

> @@ -1244,6 +1235,16 @@ __end_handlers:
>   STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)
>   STD_RELON_EXCEPTION_HV_OOL(0xf80, hv_facility_unavailable)
>  
> + /* FIXME: For now, let us move the __end_interrupts marker down past

Why is it FIXME?

In general I don't want to merge code that adds a FIXME unless there is some
very good reason.

AFAICS this is a permanent solution isn't it?

> +  * the out-of-line handlers, to make sure we also copy OOL handlers
> +  * to real adress 0x100 when running a relocatable kernel. This helps

It doesn't "help" it's 100% required.

> +  * in cases where interrupt vectors are not long enough (like 0x4f00,
> +  * 0x4f20, etc.) to branch out to OOL handlers with LOAD_HANDLER().
> +  */
> + .align  7
> + .globl  __end_interrupts
> +__end_interrupts:
> +
>  #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
>  /*
>   * Data area reserved for FWNMI option.


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [v2,2/2] cxl: Configure the PSL for two CAPI ports on POWER8NVL

2016-03-29 Thread Michael Ellerman
On Wed, 2016-16-03 at 16:28:56 UTC, Philippe Bergheaud wrote:
> The POWER8NVL chip has two CAPI ports.  Configure the PSL to route
> data to the port corresponding to the CAPP unit.
> 
> diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
> index 0c6c17a1..924ba63 100644
> --- a/drivers/misc/cxl/pci.c
> +++ b/drivers/misc/cxl/pci.c
> @@ -340,12 +341,35 @@ static void dump_afu_descriptor(struct cxl_afu *afu)
>  #undef show_reg
>  }
>  
> +#define CAPP_UNIT0_ID 0xBA
> +#define CAPP_UNIT1_ID 0XBE
> +
> +static u64 capp_unit_id(struct device_node *np)

It's always nice if function names contain a verb, eg. get_capp_unit_id().

> +{
> + const __be32 *prop;
> + u64 phb_index;
> +
> + /* For chips other than POWER8NVL, we only have CAPP 0,
> +  * irrespective of which PHB is used */

Can you use the standard block commenting style please, eg:

/*
 * For chips ..
 */

> + if (!pvr_version_is(PVR_POWER8NVL))
> + return CAPP_UNIT0_ID ;
^
don't need a space here

> + /* For POWER8NVL, assume CAPP 0 is attached to PHB0 and
> +  * CAPP 1 is attached to PHB1 */
> + prop = of_get_property(np, "ibm,phb-index", NULL);

Please use of_property_read_u32().

> + if (!prop)
> + return 0;
> + phb_index = be32_to_cpup(prop);

Some blank lines here would help readability.

> + return phb_index ? CAPP_UNIT1_ID : CAPP_UNIT0_ID;

This logic doesn't quite match your comment above. This says that any non-zero
PHB index is CAPP1, whereas the comment says CAPP1 == PHB1.

> +}
> +
>  static int init_implementation_adapter_regs(struct cxl *adapter, struct 
> pci_dev *dev)
>  {
>   struct device_node *np;
>   const __be32 *prop;
>   u64 psl_dsnctl;
>   u64 chipid;
> + u64 cappunitid;

capp_unit_id ?

>   if (!(np = pnv_pci_get_phb_node(dev)))
>   return -ENODEV;
> @@ -355,10 +379,15 @@ static int init_implementation_adapter_regs(struct cxl 
> *adapter, struct pci_dev
>   if (!np)
>   return -ENODEV;
>   chipid = be32_to_cpup(prop);
> + cappunitid = capp_unit_id(np);
>   of_node_put(np);
> + if (!cappunitid)
> + return -ENODEV;

You probably want a pr_xxx() there don't you?

>   /* Tell PSL where to route data to */
> - psl_dsnctl = 0x02E89200ULL | (chipid << (63-5));
> + psl_dsnctl = 0x9200ULL | (chipid << (63-5));
> + psl_dsnctl |= (cappunitid << (63-13));
> +
>   cxl_p1_write(adapter, CXL_PSL_DSNDCTL, psl_dsnctl);
>   cxl_p1_write(adapter, CXL_PSL_RESLCKTO, 0x2000200ULL);
>   /* snoop write mask */


cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] Remove kretprobe_trampoline_holder.

2016-03-29 Thread Michael Ellerman
On Tue, 2016-03-29 at 15:34 -0300, Thiago Jung Bauermann wrote:
> Am Dienstag, 29 März 2016, 10:45:57 schrieb Michael Ellerman:
> > On Mon, 2016-03-28 at 17:29 -0300, Thiago Jung Bauermann wrote:
> > > If I do s/_do_fork/._do_fork/ in kprobe_ftrace.tc then all ftrace kprobe
> > > tests pass:
> > 
> > OK. We fixed that in 'perf probe', but not if you're using the sysfs file
> > directly.
> > 
> > Do you want to write a patch for ftracetest to try and handle it? I guess
> > you'd try "_do_fork" and if that fails then try "._do_fork", and maybe
> > only if uname -m says you're running on ppc64?
> 
> I did write a patch yesterday (included below for reference), but then I
> noticed that the other ftrace tests use _do_fork and they work fine (I guess
> because of the fix you mentioned). I think that ideally the ftrace filter
> mechanism should work with dot symbols as well as regular symbols.
> 
> I think this could work by creating a mechanism analogous to the
> ARCH_HAS_SYSCALL_MATCH_SYM_NAME one in trace_syscalls.c. Ftrace_match_record
> could call it to adjust the symbol name (like kprobe_lookup_name) before
> calling ftrace_match.
> 
> But I’m wondering if it’s really worth the effort and maybe patching the
> testcase is enough? Also, I don’t know whether my idea would have any
> side effects.

It'd be nice if it worked properly. Reusing kprobe_lookup_name() looks like it
would be the right fix, given this is "kprobe events".

cheers

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] Remove kretprobe_trampoline_holder.

2016-03-29 Thread Thiago Jung Bauermann
Am Dienstag, 29 März 2016, 14:31:34 schrieb Michael Ellerman:
> On Mon, 2016-03-28 at 17:06 -0300, Thiago Jung Bauermann wrote:
> > With this patch, all vmlinux symbols match /proc/kallsyms and the
> > testcase passes.
> 
> Have you tested this on an LE system?

No, I was focusing on ppc64 BE.

> I did a quick test and it appears to
> be completely broken. Basically every symbol is not found, eg:
> 
>  1: vmlinux symtab matches kallsyms  :
> --- start ---
> test child forked, pid 16222
> Using /proc/kcore for kernel object code
> Looking at the vmlinux_path (8 entries long)
> Using /boot/vmlinux-4.5.0-11318-gdf01bc5cf4ea for symbols
> 0xc000b428: run_init_process not on kallsyms
> 0xc000b478: try_to_run_init_process not on kallsyms
> 0xc000b4f8: do_one_initcall not on kallsyms
> 0xc000b768: match_dev_by_uuid not on kallsyms
> ...
> 0xc09846b8: write_mem not on kallsyms
> 0xc0984728: do_fp_store not on kallsyms
> 0xc0984828: emulate_step not on kallsyms
> 
> $ sudo grep emulate_step /proc/kallsyms
> c0984820 T emulate_step
> 
> 
> Notice it's off by 8. That's because of the local/global entry point on
> LE. So that will need to be fixed somewhere.

Symbols loaded from the vmlinux file get adjusted to the local entry point 
because symbol-elf.c:dso__load_sym calls arch__elf_sym_adjust for each 
function symbol, which on ppc64le adjusts the address to the local entry 
point. On the other hand, when symbols are loaded from /proc/kallsyms they 
are used as-is (i.e., with the global entry point address).

This seems inconsistent: dso__load_kernel_sym can end up loading symbols 
from either vmlinux or /proc/kallsyms, so it seems symbols should look the 
same wherever they are loaded from. I am still trying to understand what the 
rest of the perf code expects.

If I comment out the call to arch__elf_sym_adjust in dso__load_sym, then all 
symbols are found in the vmlinux-kallsyms.c test (the test still fails 
because two symbols have different names. I haven’t checked why). Also, the 
code-reading.c test is able to read object code for a lot (but not all) of 
the sample events, whereas in the adjusted symbols case it can’t read object 
code for any sample event. The other perf tests are unaffected by the 
change.

I thought that if there’s no vmlinux available, then perf would have to rely 
only on /proc/kallsyms and I would see some difference in the test results 
compared to when perf uses vmlinux and adjusts the symbols to the local 
entry point, but I saw no difference at all.

So at first glance, it looks like perf is better off using symbols that 
point to the global entry point...
-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH] cxl: fix setting of _PAGE_USER bit when handling page faults

2016-03-29 Thread Matthew R. Ochs
> On Mar 17, 2016, at 11:01 PM, Andrew Donnellan  
> wrote:
> 
> When handling page faults, cxl_handle_page_fault() checks whether the page
> should be accessible by userspace and have its _PAGE_USER access bit set.
> _PAGE_USER should be set if the context's kernel flag isn't set, or if the
> page falls outside of kernel memory.
> 
> However, the check currently uses the wrong operator, causing it to always
> evalute to true. As such, we always set the _PAGE_USER bit, even when it
> should be restricted to the kernel.
> 
> Fix the check so that the _PAGE_USER bit is set only as intended.
> 
> Fixes: f204e0b8cedd ("cxl: Driver code for powernv PCIe based cards for
> userspace access")
> Signed-off-by: Andrew Donnellan 

Per Ian's suggestion, I went ahead and tried this with cxlflash.

Tested-by: Matthew R. Ochs 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 2/2] pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-29 Thread Russell Currey
On Tue, 2016-03-29 at 20:49 +1100, Gavin Shan wrote:
> On Tue, Mar 29, 2016 at 12:51:51PM +1000, Russell Currey wrote:
> > 
> > In the configure_pe and configure_bridge RTAS calls, the spec states
> > that values of 9900-9905 can be returned, indicating that software
> > should delay for 10^x (where x is the last digit, i.e. 990x)
> > milliseconds and attempt the call again. Currently, the kernel doesn't
> > know about this, and respecting it fixes some PCI failures when the
> > hypervisor is busy.
> > 
> > The delay is capped at 0.2 seconds.
> > 
> When talking about RTAS calls, it might be better to have their full
> names defined in PAPR spec. So I guess it'd better to replace
> configure_pe
> and configure_bridge with their corresponding full RTAS call names:
> "ibm,configure-pe" and "ibm,configure-bridge".
Makes sense, will do.
> 
> > 
> > Cc:  # 3.10-
> I think it would be #3.10+.
> 

> > +   /*
> > +    * RTAS can return a delay value of up to 10^5
> > milliseconds
> > +    * (RTAS_EXTENDED_DELAY_MAX), which is too long.  Only
> > respect
> > +    * the delay if it's 100ms or less.
> > +    */
> > 
> The PAPR spec states that the delay can be up to 10^5ms. The spec also
> suggests
> the maximal delay is 10^2. I guess it's worthy to mention it in the above
> comments
> if you like.

Yeah, I'll expand on that in the comment.
> 
> > 
> > +   switch (ret) {
> > +   case 0:
> > +   return ret;
> > +   case RTAS_EXTENDED_DELAY_MIN:
> > +   case RTAS_EXTENDED_DELAY_MIN+1:
> > +   case RTAS_EXTENDED_DELAY_MIN+2:
> > +   mwait = rtas_busy_delay(ret);
> > +   break;
> > +   default:
> > +   goto err;
> > +   }
> > +
> > +   max_wait -= mwait;
> If you like, the block can be simplified to as below. In that case,
> tag #err isn't needed.
> 
> if (!ret)
> return ret;
> 
> max_wait -= rtas_busy_delay(ret);
> 
That doesn't catch the case where the return value is greater than
RTAS_EXTENDED_DELAY_MIN+2, in which case we would be sleeping for at least
1 second.  However, I am going to change it so that any delay above 100ms
is just treated as if it was 100ms, so I can simplify the code there and
probably remove the switch and goto.

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 2/2] pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-29 Thread Russell Currey
On Tue, 2016-03-29 at 08:51 -0700, Tyrel Datwyler wrote:
> On 03/28/2016 07:51 PM, Russell Currey wrote:
> > +   /*
> > +    * RTAS can return a delay value of up to 10^5
> > milliseconds
> > +    * (RTAS_EXTENDED_DELAY_MAX), which is too long.  Only
> > respect
> > +    * the delay if it's 100ms or less.
> > +    */
> Your changelog says the delay is capped at 0.2s. However, your comment
> in the code mentions the full 10^5ms based on the 9900-9905 codes. It
> would probably be best to expand you comment to mention in the code that
> you are only handling 9900-9902 to eliminate the confusion of looking at
> the above comment vs below implementation.
> 
> Further, despite PAPRs software note that the long busy should be
> limited to 9900-9902 you might want to add a catch to your switch to log
> any unexpected 9903-9905 or just treat them as max 0.2s delay. Firmware
> has been know to do things on occasion that the spec says it shouldn't,
> and it might not be obvious at first should you receive one of the
> longer delay codes why we are going down the error path.
> 
> -Tyrel

Good to know, thanks.  I'll respin.

- Russell

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] swiotlb: prefix dma_to_phys and phys_to_dma functions

2016-03-29 Thread Arnd Bergmann
On Monday 28 March 2016 14:29:29 Konrad Rzeszutek Wilk wrote:
> > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> > index ada00c3..8c0f66b 100644
> > --- a/arch/arm64/mm/dma-mapping.c
> > +++ b/arch/arm64/mm/dma-mapping.c
> > @@ -29,6 +29,14 @@
> >
> >  #include 
> >
> > +/*
> > + * If you are building a system without IOMMU, then you are using SWIOTLB
> > + * library. The ARM64 adaptation of this library does not support address
> > + * translation and it assumes that physical address = dma address for such
> > + * a use case. Please don't build a platform that violates this.
> > + */
> 
> Why not just expand the ARM64 part to support address translation?

Because so far all hardware we have is relatively sane. We only
need to implement this if someone accidentally puts their DMA
space at the wrong address.

There is at least one platform that could in theory use this because
their RAM starts at an address that is outside of the reach of 32-bit
devices, and a static IOMMU mapping (created by firmware) could be
used to map the start of RAM into DMA address zero, to avoid having
to use an IOMMU all the time, but it was considered not worth the
effort to implement that.

Arnd
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] ppc64/book3s: fix branching to out of line handlers in relocation kernel

2016-03-29 Thread Hari Bathini
Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0 (6 instructions) that contains
the part up to the point where the CFAR is saved in the PACA should be
part of the short interrupt vectors before we branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

1. In almost all cases, production kernel (relocatable) is used for
   kdump as well, which would mean that crashed kernel's OOL handler
   would be at the same place where we endup branching to, from short
   interrupt vector of kdump kernel.
2. Also, OOL handler was unlikely the reason for crash in almost all
   the kdump scenarios, which meant we had a sane OOL handler from
   crashed kernel that we branched to.
3. On most 64-bit POWER server processors, page size is large enough
   that marking interrupt vector code as executable (see commit
   429d2e83) leads to marking OOL handler code from crashed kernel,
   that sits right below interrupt vector code from kdump kernel, as
   executable as well.

Let us fix this undependable code path by moving the __end_interrupts marker
down past OOL handlers to make sure that we also copy OOL handlers to real
address 0x100 when running a relocatable kernel. This helps in cases discussed
above, where interrupt vectors are not long enough to branch out to OOL handlers
with LOAD_HANDLER(). While we are here, let us remove the virtually 
insignificant
__end_handlers marker.

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

Signed-off-by: Hari Bathini 
Signed-off-by: Mahesh Salgaonkar 
---

changes from v1:
1. Changed the subject from "copy interrupts till __end_handlers marker
   instead of __end_interrupts" to a more generic one
2. Used __end_interrupts marker instead of __end_handlers to make the fix
   less complicated.
3. Removed unused __end_handlers marker.


 arch/powerpc/kernel/exceptions-64s.S |   23 ---
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 7716ceb..e598580 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,8 +764,8 @@ kvmppc_skip_Hinterrupt:
 #endif
 
 /*
- * Code from here down to __end_handlers is invoked from the
- * exception prologs above.  Because the prologs assemble the
+ * Code from here down to end of out of line handlers is invoked from
+ * the exception prologs above.  Because the prologs assemble the
  * addresses of these handlers using the LOAD_HANDLER macro,
  * which uses an ori instruction, these handlers must be in
  * the first 64k of the kernel image.
@@ -953,11 +953,6 @@ hv_facility_unavailable_relon_trampoline:
 #endif
STD_RELON_EXCEPTION_PSERIES(0x5700, 0x1700, altivec_assist)
 
-   /* Other future vectors */
-   .align  7
-   .globl  __end_interrupts
-__end_interrupts:
-
.align  7
 system_call_entry:
b   system_call_common
@@ -1230,10 +1225,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX)
STD_EXCEPTION_COMMON(0xf60, facility_unavailable, 
facility_unavailable_exception)
STD_EXCEPTION_COMMON(0xf80, hv_facility_unavailable, 
facility_unavailable_exception)
 
-   .align  7
-   .globl  __end_handlers
-__end_handlers:
-
/* Equivalents to the above handlers for relocation-on interrupt 
vectors */
STD_RELON_EXCEPTION_HV_OOL(0xe40, emulation_assist)
MASKABLE_RELON_EXCEPTION_HV_OOL(0xe80, h_doorbell)
@@ -1244,6 +1235,16 @@ __end_handlers:
STD_RELON_EXCEPTION_PSERIES_OOL(0xf60, facility_unavailable)

Re: [PATCH] Remove kretprobe_trampoline_holder.

2016-03-29 Thread Thiago Jung Bauermann
Am Dienstag, 29 März 2016, 10:45:57 schrieb Michael Ellerman:
> On Mon, 2016-03-28 at 17:29 -0300, Thiago Jung Bauermann wrote:
> > If I do s/_do_fork/._do_fork/ in kprobe_ftrace.tc then all ftrace kprobe
> > tests pass:
> 
> OK. We fixed that in 'perf probe', but not if you're using the sysfs file
> directly.
> 
> Do you want to write a patch for ftracetest to try and handle it? I guess
> you'd try "_do_fork" and if that fails then try "._do_fork", and maybe
> only if uname -m says you're running on ppc64?

I did write a patch yesterday (included below for reference), but then I
noticed that the other ftrace tests use _do_fork and they work fine (I guess
because of the fix you mentioned). I think that ideally the ftrace filter
mechanism should work with dot symbols as well as regular symbols.

I think this could work by creating a mechanism analogous to the
ARCH_HAS_SYSCALL_MATCH_SYM_NAME one in trace_syscalls.c. Ftrace_match_record
could call it to adjust the symbol name (like kprobe_lookup_name) before
calling ftrace_match.

But I’m wondering if it’s really worth the effort and maybe patching the
testcase is enough? Also, I don’t know whether my idea would have any
side effects.

-- 
[]'s
Thiago Jung Bauermann
IBM Linux Technology Center


From 2ea9b3545906932b9aa213506ec0dff03cffdbe5 Mon Sep 17 00:00:00 2001
From: Thiago Jung Bauermann 
Date: Mon, 28 Mar 2016 18:39:56 -0300
Subject: [PATCH] selftests: kprobe: Fix kprobe_ftrace.tc on ppc64

On ppc64 (but not ppc64le), symbols for function entry points start with
a dot. There's no _do_fork but there is a ._do_fork.

Signed-off-by: Thiago Jung Bauermann 
---
 .../selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc  | 20 +---
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc 
b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index d6f2f49..c3ec5c2 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -4,33 +4,39 @@
 [ -f kprobe_events ] || exit_unsupported # this is configurable
 grep function available_tracers || exit_unsupported # this is configurable
 
+if [ "$(uname -m)" = "ppc64" ]; then
+FILTER_FUNCTION="._do_fork"
+else
+FILTER_FUNCTION="_do_fork"
+fi
+
 # prepare
 echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
+echo $FILTER_FUNCTION > set_ftrace_filter
 echo 0 > events/enable
 echo > kprobe_events
-echo 'p:testprobe _do_fork' > kprobe_events
+echo "p:testprobe $FILTER_FUNCTION" > kprobe_events
 
 # kprobe on / ftrace off
 echo 1 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep "$FILTER_FUNCTION <-" trace
 
 # kprobe on / ftrace on
 echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FILTER_FUNCTION <-" trace
 
 # kprobe off / ftrace on
 echo 0 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 ! grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FILTER_FUNCTION <-" trace
 
 # kprobe on / ftrace on
 echo 1 > events/kprobes/testprobe/enable
@@ -38,14 +44,14 @@ echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FILTER_FUNCTION <-" trace
 
 # kprobe on / ftrace off
 echo nop > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep "$FILTER_FUNCTION <-" trace
 
 # cleanup
 echo nop > current_tracer
-- 
1.9.1


___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: ppc64/book3s: copy interrupts till __end_handlers marker instead of __end_interrupts

2016-03-29 Thread Hari Bathini



On 03/29/2016 03:47 PM, Michael Ellerman wrote:

Hi Hari,

You win the "Best Change Log of the Year" award.

Some comments below ...

On Mon, 2016-28-03 at 11:23:22 UTC, Hari Bathini wrote:

Some of the interrupt vectors on 64-bit POWER server processors  are
only 32 bytes long (8 instructions), which is not enough for the full
first-level interrupt handler. For these we need to branch to an out-
of-line (OOL) handler. But when we are running a relocatable kernel,
interrupt vectors till __end_interrupts marker are copied down to real
address 0x100. So, branching to labels (read OOL handlers) outside this
section should be handled differently (see LOAD_HANDLER()), considering
relocatable kernel, which would need atleast 4 instructions.

However, branching from interrupt vector means that we corrupt the CFAR
(come-from address register) on POWER7 and later processors as mentioned
in commit 1707dd16. So, EXCEPTION_PROLOG_0
(6 instructions) that contains the part up to the point where the CFAR is
saved in the PACA should be part of the short interrupt vectors before we
branch out to OOL handlers.

But as mentioned already, there are interrupt vectors on 64-bit POWER server
processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
which cannot accomodate the above two cases at the same time owing to space
constraint. Currently, in these interrupt vectors, we simply branch out to
OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
running a relocatable kernel (eg. kdump case). While this has been the case
for sometime now and kdump is used widely, we were fortunate not to see any
problems so far, for three reasons:

 1. In almost all cases, production kernel (relocatable) is used for
kdump as well, which would mean that crashed kernel's OOL handler
would be at the same place where we endup branching to, from short
interrupt vector of kdump kernel.
 2. Also, OOL handler was unlikely the reason for crash in almost all
the kdump scenarios, which meant we had a sane OOL handler from
crashed kernel that we branched to.
 3. On most 64-bit POWER server processors, page size is large enough
that marking interrupt vector code as executable (see commit
429d2e83) leads to marking OOL handler code from crashed kernel,
that sits right below interrupt vector code from kdump kernel, as
executable as well.

Let us fix this undependable code path firstly, by moving down __end_handlers
marker down past OOL handlers. Secondly, copying interrupt vectors down till
__end_handlers marker instead of __end_interrupts, when running a relocatable
kernel, to make sure we endup in relocated (kdump) kernel's OOL handler instead
of crashed kernel's. Thirdly, by marking all the interrupt vector code that is
copied down to real address 0x100 as executable, considering the relocation on
exception feature that allows exceptions to be raised in virtual mode (IR=DR=1).

This fix has been tested successfully in kdump scenario, on a lpar with 4K page
size by using different default/production kernel and kdump kernel.

So I think you've missed one important case.


My bad! I missed out on considering this case..


In do_final_fixups() we recopy the (now patched) kernel code down to zero. That
code uses __end_interrupts as its limit, so I think if you look closely your OOL
handlers down at zero will not have had feature fixups applied to them.

I think perhaps the better fix is just to move __end_interrupts down (up) to the
right location. AFAICS all users of __end_interrupts actually want that address.

It would also mean we could remove __end_handlers as unused.


True. This sounds less complicated.


So can you please check that I'm right about do_final_fixups(), and then try
moving __end_interrupts and check that works?


Yeah. Testing the patch. Will post it soon.
Thanks for the review!

- Hari

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 2/2] pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-29 Thread Tyrel Datwyler
On 03/28/2016 07:51 PM, Russell Currey wrote:
> In the configure_pe and configure_bridge RTAS calls, the spec states
> that values of 9900-9905 can be returned, indicating that software
> should delay for 10^x (where x is the last digit, i.e. 990x)
> milliseconds and attempt the call again. Currently, the kernel doesn't
> know about this, and respecting it fixes some PCI failures when the
> hypervisor is busy.
> 
> The delay is capped at 0.2 seconds.
> 
> Cc:  # 3.10-
> Signed-off-by: Russell Currey 
> ---
> V2: Use rtas_busy_delay and the new ibm_configure_pe token, refactoring
> ---
>  arch/powerpc/platforms/pseries/eeh_pseries.c | 35 
> +++-
>  1 file changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
> b/arch/powerpc/platforms/pseries/eeh_pseries.c
> index 231b1df..c442b11 100644
> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
> @@ -619,20 +619,43 @@ static int pseries_eeh_configure_bridge(struct eeh_pe 
> *pe)
>  {
>   int config_addr;
>   int ret;
> + /* Waiting 0.2s maximum before skipping configuration */
> + int max_wait = 200;
> + int mwait;
>  
>   /* Figure out the PE address */
>   config_addr = pe->config_addr;
>   if (pe->addr)
>   config_addr = pe->addr;
>  
> - ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> - config_addr, BUID_HI(pe->phb->buid),
> - BUID_LO(pe->phb->buid));
> + while (max_wait > 0) {
> + ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
> + config_addr, BUID_HI(pe->phb->buid),
> + BUID_LO(pe->phb->buid));
>  
> - if (ret)
> - pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> - __func__, pe->phb->global_number, pe->addr, ret);
> + /*
> +  * RTAS can return a delay value of up to 10^5 milliseconds
> +  * (RTAS_EXTENDED_DELAY_MAX), which is too long.  Only respect
> +  * the delay if it's 100ms or less.
> +  */

Your changelog says the delay is capped at 0.2s. However, your comment
in the code mentions the full 10^5ms based on the 9900-9905 codes. It
would probably be best to expand you comment to mention in the code that
you are only handling 9900-9902 to eliminate the confusion of looking at
the above comment vs below implementation.

Further, despite PAPRs software note that the long busy should be
limited to 9900-9902 you might want to add a catch to your switch to log
any unexpected 9903-9905 or just treat them as max 0.2s delay. Firmware
has been know to do things on occasion that the spec says it shouldn't,
and it might not be obvious at first should you receive one of the
longer delay codes why we are going down the error path.

-Tyrel

>  
> + switch (ret) {
> + case 0:
> + return ret;
> + case RTAS_EXTENDED_DELAY_MIN:
> + case RTAS_EXTENDED_DELAY_MIN+1:
> + case RTAS_EXTENDED_DELAY_MIN+2:
> + mwait = rtas_busy_delay(ret);
> + break;
> + default:
> + goto err;
> + }
> +
> + max_wait -= mwait;
> + }
> +err:
> + pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
> + __func__, pe->phb->global_number, pe->addr, ret);
>   return ret;
>  }
>  
> 

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH v2] powerpc: mm: fixup preempt undefflow with huge pages

2016-03-29 Thread Sebastian Andrzej Siewior
On 2016-03-10 01:04:24 [+0530], Aneesh Kumar K.V wrote:
> Sebastian Andrzej Siewior  writes:

*ping*
http://patchwork.ozlabs.org/patch/593943/

> > [ text/plain ]
> > hugepd_free() used __get_cpu_var() once. Nothing ensured that the code
> > accessing the variable did not migrate from one CPU to another and soon
> > this was noticed by Tiejun Chen in 94b09d755462 ("powerpc/hugetlb:
> > Replace __get_cpu_var with get_cpu_var"). So we had it fixed.
> >
> > Christoph Lameter was doing his __get_cpu_var() replaces and forgot
> > PowerPC. Then he noticed this and sent his fixed up batch again which
> > got applied as 69111bac42f5 ("powerpc: Replace __get_cpu_var uses").
> >
> > The careful reader will noticed one little detail: get_cpu_var() got
> > replaced with this_cpu_ptr(). So now we have a put_cpu_var() which does
> > a preempt_enable() and nothing that does preempt_disable() so we
> > underflow the preempt counter.
> >
> 
> Reviewed-by: Aneesh Kumar K.V 
> 
> > Cc: Benjamin Herrenschmidt 
> > Cc: Christoph Lameter 
> > Cc: Michael Ellerman 
> > Cc: 
> > Signed-off-by: Sebastian Andrzej Siewior 
> > ---
> > v1…v2: - use get_cpu_var() instead of get_cpu_ptr()
> >- correct indentation of put_cpu_var()
> >
> >  arch/powerpc/mm/hugetlbpage.c | 4 ++--
> >  1 file changed, 2 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
> > index 744e24bcb85c..4a811ca7ac9d 100644
> > --- a/arch/powerpc/mm/hugetlbpage.c
> > +++ b/arch/powerpc/mm/hugetlbpage.c
> > @@ -414,13 +414,13 @@ static void hugepd_free(struct mmu_gather *tlb, void 
> > *hugepte)
> >  {
> > struct hugepd_freelist **batchp;
> >  
> > -   batchp = this_cpu_ptr(_freelist_cur);
> > +   batchp = _cpu_var(hugepd_freelist_cur);
> >  
> > if (atomic_read(>mm->mm_users) < 2 ||
> > cpumask_equal(mm_cpumask(tlb->mm),
> >   cpumask_of(smp_processor_id( {
> > kmem_cache_free(hugepte_cache, hugepte);
> > -put_cpu_var(hugepd_freelist_cur);
> > +   put_cpu_var(hugepd_freelist_cur);
> > return;
> > }
> >  

Sebastian
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] swiotlb: prefix dma_to_phys and phys_to_dma functions

2016-03-29 Thread Stefano Stabellini
On Mon, 28 Mar 2016, Konrad Rzeszutek Wilk wrote:
> On Fri, Mar 18, 2016 at 11:00 AM, Sinan Kaya  wrote:
> > On 3/18/2016 8:12 AM, Robin Murphy wrote:
> >> Since we know for sure that swiotlb_to_phys is a no-op on arm64, it might 
> >> be cleaner to simply not reference it at all. I suppose we could have some 
> >> private local wrappers, e.g.:
> >>
> >> #define swiotlb_to_virt(addr) phys_to_virt((phys_addr_t)(addr))
> >>
> >> to keep the intent of the code clear (and just in case anyone ever builds 
> >> a system mad enough to warrant switching out that definition, but I'd hope 
> >> that never happens).
> >>
> >> Otherwise, looks good - thanks for doing this!
> >
> > OK. I added this. Reviewed-by?
> >
> > I'm not happy to submit such a big patch for all different ARCHs. I couldn't
> > find a cleaner solution. I'm willing to split this patch into multiple if 
> > there
> > is a better way.
> >
> > diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c
> > index ada00c3..8c0f66b 100644
> > --- a/arch/arm64/mm/dma-mapping.c
> > +++ b/arch/arm64/mm/dma-mapping.c
> > @@ -29,6 +29,14 @@
> >
> >  #include 
> >
> > +/*
> > + * If you are building a system without IOMMU, then you are using SWIOTLB
> > + * library. The ARM64 adaptation of this library does not support address
> > + * translation and it assumes that physical address = dma address for such
> > + * a use case. Please don't build a platform that violates this.
> > + */
> 
> Why not just expand the ARM64 part to support address translation?
>
> > +#define swiotlb_to_virt(addr) phys_to_virt((phys_addr_t)(addr))
> > +
> 
> Adding Stefano here.

Could you please explain what is the problem that you are trying to
solve? In other words, what is the issue with assuming that physical
address = dma address (and the current dma_to_phys and phys_to_dma
static inlines) if no arm64 platforms violate it? That's pretty much
what is done on x86 too (without X86_DMA_REMAP).

If you want to make sure that the assumption is not violated, you can
introduce a boot time check or a BUG_ON somewhere.

If there is an arm64 platform with phys_addr != dma_addr, we need proper
support for it. In fact even if there is an IOMMU on that platform, when
running Xen on it, the IOMMU would be used by the hypervisor and Linux
would still end up without it, using the swiotlb.

 
> >  static pgprot_t __get_dma_pgprot(struct dma_attrs *attrs, pgprot_t prot,
> >  bool coherent)
> >  {
> > @@ -188,7 +196,7 @@ static void __dma_free(struct device *dev, size_t size,
> >void *vaddr, dma_addr_t dma_handle,
> >struct dma_attrs *attrs)
> >  {
> > -   void *swiotlb_addr = phys_to_virt(swiotlb_dma_to_phys(dev, 
> > dma_handle));
> > +   void *swiotlb_addr = swiotlb_to_virt(dma_handle);
> >
> > size = PAGE_ALIGN(size);
> >
> > @@ -209,8 +217,7 @@ static dma_addr_t __swiotlb_map_page(struct device 
> > *dev, struct page *page,
> >
> > dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
> > if (!is_device_dma_coherent(dev))
> > -   __dma_map_area(phys_to_virt(swiotlb_dma_to_phys(dev, 
> > dev_addr)),
> > -  size, dir);
> > +   __dma_map_area(swiotlb_to_virt(dev_addr), size, dir);
> >
> > return dev_addr;
> >  }
> > @@ -283,8 +290,7 @@ static void __swiotlb_sync_single_for_device(struct 
> > device *dev,
> >  {
> > swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
> > if (!is_device_dma_coherent(dev))
> > -   __dma_map_area(phys_to_virt(swiotlb_dma_to_phys(dev, 
> > dev_addr)),
> > -  size, dir);
> > +   __dma_map_area(swiotlb_to_virt(dev_addr), size, dir);
> >  }
> >
> >
> >
> >
> > --
> > Sinan Kaya
> > Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
> > Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
> > Foundation Collaborative Project
> 
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH 2/3] swiotlb: prefix dma_to_phys and phys_to_dma functions

2016-03-29 Thread Sinan Kaya
On 3/29/2016 8:44 AM, Stefano Stabellini wrote:
> Could you please explain what is the problem that you are trying to
> solve? In other words, what is the issue with assuming that physical
> address = dma address (and the current dma_to_phys and phys_to_dma
> static inlines) if no arm64 platforms violate it? That's pretty much
> what is done on x86 too (without X86_DMA_REMAP).
> 
> If you want to make sure that the assumption is not violated, you can
> introduce a boot time check or a BUG_ON somewhere.
> 
> If there is an arm64 platform with phys_addr != dma_addr, we need proper
> support for it. In fact even if there is an IOMMU on that platform, when
> running Xen on it, the IOMMU would be used by the hypervisor and Linux
> would still end up without it, using the swiotlb.


The problem is that device drivers are trying to use dma_to_phys and phys_to_dma
APIs to do address translation even though these two API do not exist in DMA 
mapping
layer. (see patch #1 and I made the same mistake in my HIDMA code)

Especially, when a device is behind an IOMMU; the DMA addresses are not equal
to physical addresses. Usage of dma_to_phys causes a crash on the system.

I'm trying to prefix the dma_to_phys and phys_to_dma API with swiotlb so that
we know what it is intended for and usage of these API in drivers need to be
discouraged in the future during code reviews.

Since I renamed the API, Robin Murphy made a suggestion to convert this 

phys_to_virt(swiotlb_dma_to_phys(dev, dma_handle))

to this

#define swiotlb_to_virt(addr) phys_to_virt((phys_addr_t)(addr))

swiotlb_to_virt(dma_handle)

just to reduce code clutter since we know swiotlb_dma_to_phys returns phys 
already
for ARM64 architecture.

I think we can do this exercise later. I'll undo this change for the moment. 
Let's focus on the API rename. 

I don't want this general purpose dma_to_phys API returning phys=dma value. 
This is
not correct.

-- 
Sinan Kaya
Qualcomm Technologies, Inc. on behalf of Qualcomm Innovation Center, Inc.
Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum, a Linux 
Foundation Collaborative Project
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: ppc64/book3s: copy interrupts till __end_handlers marker instead of __end_interrupts

2016-03-29 Thread Michael Ellerman
Hi Hari,

You win the "Best Change Log of the Year" award.

Some comments below ...

On Mon, 2016-28-03 at 11:23:22 UTC, Hari Bathini wrote:
> Some of the interrupt vectors on 64-bit POWER server processors  are
> only 32 bytes long (8 instructions), which is not enough for the full
> first-level interrupt handler. For these we need to branch to an out-
> of-line (OOL) handler. But when we are running a relocatable kernel,
> interrupt vectors till __end_interrupts marker are copied down to real
> address 0x100. So, branching to labels (read OOL handlers) outside this
> section should be handled differently (see LOAD_HANDLER()), considering
> relocatable kernel, which would need atleast 4 instructions.
> 
> However, branching from interrupt vector means that we corrupt the CFAR
> (come-from address register) on POWER7 and later processors as mentioned
> in commit 1707dd16. So, EXCEPTION_PROLOG_0
> (6 instructions) that contains the part up to the point where the CFAR is
> saved in the PACA should be part of the short interrupt vectors before we
> branch out to OOL handlers.
> 
> But as mentioned already, there are interrupt vectors on 64-bit POWER server
> processors that are only 32 bytes long (like vectors 0x4f00, 0x4f20, etc.),
> which cannot accomodate the above two cases at the same time owing to space
> constraint. Currently, in these interrupt vectors, we simply branch out to
> OOL handlers, without using LOAD_HANDLER(), which leaves us vulnerable when
> running a relocatable kernel (eg. kdump case). While this has been the case
> for sometime now and kdump is used widely, we were fortunate not to see any
> problems so far, for three reasons:
> 
> 1. In almost all cases, production kernel (relocatable) is used for
>kdump as well, which would mean that crashed kernel's OOL handler
>would be at the same place where we endup branching to, from short
>interrupt vector of kdump kernel.
> 2. Also, OOL handler was unlikely the reason for crash in almost all
>the kdump scenarios, which meant we had a sane OOL handler from
>crashed kernel that we branched to.
> 3. On most 64-bit POWER server processors, page size is large enough
>that marking interrupt vector code as executable (see commit
>429d2e83) leads to marking OOL handler code from crashed kernel,
>that sits right below interrupt vector code from kdump kernel, as
>executable as well.
> 
> Let us fix this undependable code path firstly, by moving down __end_handlers
> marker down past OOL handlers. Secondly, copying interrupt vectors down till
> __end_handlers marker instead of __end_interrupts, when running a relocatable
> kernel, to make sure we endup in relocated (kdump) kernel's OOL handler 
> instead
> of crashed kernel's. Thirdly, by marking all the interrupt vector code that is
> copied down to real address 0x100 as executable, considering the relocation on
> exception feature that allows exceptions to be raised in virtual mode 
> (IR=DR=1).
> 
> This fix has been tested successfully in kdump scenario, on a lpar with 4K 
> page
> size by using different default/production kernel and kdump kernel.

So I think you've missed one important case.

In do_final_fixups() we recopy the (now patched) kernel code down to zero. That
code uses __end_interrupts as its limit, so I think if you look closely your OOL
handlers down at zero will not have had feature fixups applied to them.

I think perhaps the better fix is just to move __end_interrupts down (up) to the
right location. AFAICS all users of __end_interrupts actually want that address.

It would also mean we could remove __end_handlers as unused.

So can you please check that I'm right about do_final_fixups(), and then try
moving __end_interrupts and check that works?

cheers
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: mailman From rewriting [was perf jit: genelf makes assumptions about endian]

2016-03-29 Thread Stephen Rothwell
Hi Jeremy,

On Tue, 29 Mar 2016 16:56:58 +0800 Jeremy Kerr  wrote:
>
> From the mail that Anton has sent to linuxppc-dev:
> 
> > From: Anton Blanchard via Linuxppc-dev 
> > Reply-To: Anton Blanchard   
> 
> Do you know why mailman would be re-writing From: there? It's confusing
> patchwork, as multiple mails are now coming from that address.

Yep, Anton posts from samba.org.  They publish a DMARC policy that
breaks mailing lists.  The best thing we can do is to do the above
rewrite of the From header.

The alternative is that all our Yahoo, Gmail and Hotmail subscribers
(at least) would bounce the email.

So this rewriting will happen for any From: line that has an address
that has such a DMARC policy.  We have had posts from Yahoo subscribers
bounced in the past.
-- 
Cheers,
Stephen Rothwell
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 2/2] pseries/eeh: Handle RTAS delay requests in configure_bridge

2016-03-29 Thread Gavin Shan
On Tue, Mar 29, 2016 at 12:51:51PM +1000, Russell Currey wrote:
>In the configure_pe and configure_bridge RTAS calls, the spec states
>that values of 9900-9905 can be returned, indicating that software
>should delay for 10^x (where x is the last digit, i.e. 990x)
>milliseconds and attempt the call again. Currently, the kernel doesn't
>know about this, and respecting it fixes some PCI failures when the
>hypervisor is busy.
>
>The delay is capped at 0.2 seconds.
>

When talking about RTAS calls, it might be better to have their full
names defined in PAPR spec. So I guess it'd better to replace configure_pe
and configure_bridge with their corresponding full RTAS call names:
"ibm,configure-pe" and "ibm,configure-bridge".

>Cc:  # 3.10-

I think it would be #3.10+.

>Signed-off-by: Russell Currey 
>---
>V2: Use rtas_busy_delay and the new ibm_configure_pe token, refactoring
>---
> arch/powerpc/platforms/pseries/eeh_pseries.c | 35 +++-
> 1 file changed, 29 insertions(+), 6 deletions(-)
>
>diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
>b/arch/powerpc/platforms/pseries/eeh_pseries.c
>index 231b1df..c442b11 100644
>--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
>+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
>@@ -619,20 +619,43 @@ static int pseries_eeh_configure_bridge(struct eeh_pe 
>*pe)
> {
>   int config_addr;
>   int ret;
>+  /* Waiting 0.2s maximum before skipping configuration */
>+  int max_wait = 200;
>+  int mwait;
> 
>   /* Figure out the PE address */
>   config_addr = pe->config_addr;
>   if (pe->addr)
>   config_addr = pe->addr;
> 
>-  ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>-  config_addr, BUID_HI(pe->phb->buid),
>-  BUID_LO(pe->phb->buid));
>+  while (max_wait > 0) {
>+  ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>+  config_addr, BUID_HI(pe->phb->buid),
>+  BUID_LO(pe->phb->buid));
> 
>-  if (ret)
>-  pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
>-  __func__, pe->phb->global_number, pe->addr, ret);
>+  /*
>+   * RTAS can return a delay value of up to 10^5 milliseconds
>+   * (RTAS_EXTENDED_DELAY_MAX), which is too long.  Only respect
>+   * the delay if it's 100ms or less.
>+   */
> 

The PAPR spec states that the delay can be up to 10^5ms. The spec also suggests
the maximal delay is 10^2. I guess it's worthy to mention it in the above 
comments
if you like.

>+  switch (ret) {
>+  case 0:
>+  return ret;
>+  case RTAS_EXTENDED_DELAY_MIN:
>+  case RTAS_EXTENDED_DELAY_MIN+1:
>+  case RTAS_EXTENDED_DELAY_MIN+2:
>+  mwait = rtas_busy_delay(ret);
>+  break;
>+  default:
>+  goto err;
>+  }
>+
>+  max_wait -= mwait;

If you like, the block can be simplified to as below. In that case,
tag #err isn't needed.

if (!ret)
return ret;

max_wait -= rtas_busy_delay(ret);

>+  }
>+err:
>+  pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x (%d)\n",
>+  __func__, pe->phb->global_number, pe->addr, ret);
>   return ret;
> }
> 

Thanks,
Gavin

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

Re: [PATCH V2 1/2] pseries/eeh: Refactor the configure bridge RTAS tokens

2016-03-29 Thread Gavin Shan
On Tue, Mar 29, 2016 at 03:53:19PM +1000, Russell Currey wrote:
>On Tue, 2016-03-29 at 16:26 +1100, Gavin Shan wrote:
>> On Tue, Mar 29, 2016 at 12:51:50PM +1000, Russell Currey wrote:
>
>> >/*
>> > * Necessary sanity check. We needn't check "get-config-addr-info"
>> > @@ -93,8 +98,7 @@ static int pseries_eeh_init(void)
>> >(ibm_read_slot_reset_state2 == RTAS_UNKNOWN_SERVICE &&
>> > ibm_read_slot_reset_state == RTAS_UNKNOWN_SERVICE) ||
>> >ibm_slot_error_detail == RTAS_UNKNOWN_SERVICE   ||
>> > -  (ibm_configure_pe == RTAS_UNKNOWN_SERVICE   &
>> > &
>> > -   ibm_configure_bridge == RTAS_UNKNOWN_SERVICE)) {
>> > +  ibm_configure_pe == RTAS_UNKNOWN_SERVICE) {
>> >pr_info("EEH functionality not supported\n");
>> >return -EINVAL;
>> >}
>> Since you're here, you can do similar thing to @ibm_read_slot_reset_state
>> and @ibm_read_slot_reset_state?
>
>Ah, didn't notice there was a similar thing going on there.  Will fix.

Ok.

>> 
>> > 
>> > @@ -621,18 +625,9 @@ static int pseries_eeh_configure_bridge(struct
>> > eeh_pe *pe)
>> >if (pe->addr)
>> >config_addr = pe->addr;
>> > 
>> > -  /* Use new configure-pe function, if supported */
>> > -  if (ibm_configure_pe != RTAS_UNKNOWN_SERVICE) {
>> > -  ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>> > -  config_addr, BUID_HI(pe->phb->buid),
>> > -  BUID_LO(pe->phb->buid));
>> > -  } else if (ibm_configure_bridge != RTAS_UNKNOWN_SERVICE) {
>> > -  ret = rtas_call(ibm_configure_bridge, 3, 1, NULL,
>> > -  config_addr, BUID_HI(pe->phb->buid),
>> > -  BUID_LO(pe->phb->buid));
>> > -  } else {
>> > -  return -EFAULT;
>> > -  }
>> > +  ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
>> > +  config_addr, BUID_HI(pe->phb->buid),
>> > +  BUID_LO(pe->phb->buid));
>> > 
>> Russell, it seems not working if "ibm,configure-pe" and "ibm,configure-
>> bridge" are all
>> missed from "/rtas".
>
>If they're both missing, then the init should fail as ibm_configure_pe will
>be RTAS_UNKNOWN_SERVICE, so this code should never be called.
>

Yeah, I missed the point, thanks.

>>  Also, I don't think we need backport it to 3.10+ as it's not fixing
>> any bugs if I'm correct enough.
>
>This patch doesn't, but the second patch does.
>

Ok. In the commit log of this patch, you have something like below and that
means it needs by stable kernels. I agree the next one is needed by stable
kernels, so the two patches would have inversed order if you agree. In that
case, the next one (to be in stable kernels) won't depend on current on which
isn't required by stable kernels.

Cc:  # 3.10-   <<< The format would be 3.10+

Thanks,
Gavin

>> > 
>> >if (ret)
>> >pr_warn("%s: Unable to configure bridge PHB#%d-PE#%x
>> > (%d)\n",
>

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

mailman From rewriting [was perf jit: genelf makes assumptions about endian]

2016-03-29 Thread Jeremy Kerr
Hi Stephen,

From the mail that Anton has sent to linuxppc-dev:

> From: Anton Blanchard via Linuxppc-dev 
> Reply-To: Anton Blanchard 

Do you know why mailman would be re-writing From: there? It's confusing
patchwork, as multiple mails are now coming from that address.

Anton: did you do anything special when sending that mail?

Cheers,


Jeremy
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] perf jit: genelf makes assumptions about endian

2016-03-29 Thread Anton Blanchard via Linuxppc-dev
Commit 9b07e27f88b9 ("perf inject: Add jitdump mmap injection support")
incorrectly assumed that PowerPC is big endian only.

Simplify things by consolidating the define of GEN_ELF_ENDIAN and checking
for __BYTE_ORDER == __BIG_ENDIAN.

The PowerPC checks were also incorrect, they do not match what gcc
emits. We should first look for __powerpc64__, then __powerpc__.

Fixes: 9b07e27f88b9 ("perf inject: Add jitdump mmap injection support")
Signed-off-by: Anton Blanchard 
---

diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index cd67e64..2fbeb59 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -9,36 +9,32 @@ int jit_add_debug_info(Elf *e, uint64_t code_addr, void 
*debug, int nr_debug_ent
 
 #if   defined(__arm__)
 #define GEN_ELF_ARCH   EM_ARM
-#define GEN_ELF_ENDIAN ELFDATA2LSB
 #define GEN_ELF_CLASS  ELFCLASS32
 #elif defined(__aarch64__)
 #define GEN_ELF_ARCH   EM_AARCH64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
 #define GEN_ELF_CLASS  ELFCLASS64
 #elif defined(__x86_64__)
 #define GEN_ELF_ARCH   EM_X86_64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
 #define GEN_ELF_CLASS  ELFCLASS64
 #elif defined(__i386__)
 #define GEN_ELF_ARCH   EM_386
-#define GEN_ELF_ENDIAN ELFDATA2LSB
 #define GEN_ELF_CLASS  ELFCLASS32
-#elif defined(__ppcle__)
-#define GEN_ELF_ARCH   EM_PPC
-#define GEN_ELF_ENDIAN ELFDATA2LSB
-#define GEN_ELF_CLASS  ELFCLASS64
-#elif defined(__powerpc__)
-#define GEN_ELF_ARCH   EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2MSB
-#define GEN_ELF_CLASS  ELFCLASS64
-#elif defined(__powerpcle__)
+#elif defined(__powerpc64__)
 #define GEN_ELF_ARCH   EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
 #define GEN_ELF_CLASS  ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH   EM_PPC
+#define GEN_ELF_CLASS  ELFCLASS32
 #else
 #error "unsupported architecture"
 #endif
 
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#endif
+
 #if GEN_ELF_CLASS == ELFCLASS64
 #define elf_newehdrelf64_newehdr
 #define elf_getshdrelf64_getshdr
___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] fsl/qe_ic: move qe_ic to drivers/irqchip

2016-03-29 Thread Zhao Qiang
qe_ic was put in drivers/soc/fsl/qe, but it should be in
drivers/irqchip.

Signed-off-by: Zhao Qiang 
---
 drivers/irqchip/Makefile| 1 +
 drivers/{soc/fsl/qe => irqchip}/qe_ic.c | 0
 drivers/{soc/fsl/qe => irqchip}/qe_ic.h | 0
 drivers/soc/fsl/qe/Makefile | 2 +-
 4 files changed, 2 insertions(+), 1 deletion(-)
 rename drivers/{soc/fsl/qe => irqchip}/qe_ic.c (100%)
 rename drivers/{soc/fsl/qe => irqchip}/qe_ic.h (100%)

diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 18caacb..2977a89 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -59,3 +59,4 @@ obj-$(CONFIG_ARCH_SA1100) += irq-sa11x0.o
 obj-$(CONFIG_INGENIC_IRQ)  += irq-ingenic.o
 obj-$(CONFIG_IMX_GPCV2)+= irq-imx-gpcv2.o
 obj-$(CONFIG_PIC32_EVIC)   += irq-pic32-evic.o
+obj-$(CONFIG_QUICC_ENGINE) += qe_ic.o
diff --git a/drivers/soc/fsl/qe/qe_ic.c b/drivers/irqchip/qe_ic.c
similarity index 100%
rename from drivers/soc/fsl/qe/qe_ic.c
rename to drivers/irqchip/qe_ic.c
diff --git a/drivers/soc/fsl/qe/qe_ic.h b/drivers/irqchip/qe_ic.h
similarity index 100%
rename from drivers/soc/fsl/qe/qe_ic.h
rename to drivers/irqchip/qe_ic.h
diff --git a/drivers/soc/fsl/qe/Makefile b/drivers/soc/fsl/qe/Makefile
index 2031d38..51e4726 100644
--- a/drivers/soc/fsl/qe/Makefile
+++ b/drivers/soc/fsl/qe/Makefile
@@ -1,7 +1,7 @@
 #
 # Makefile for the linux ppc-specific parts of QE
 #
-obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_common.o qe_ic.o qe_io.o
+obj-$(CONFIG_QUICC_ENGINE)+= qe.o qe_common.o qe_io.o
 obj-$(CONFIG_CPM)  += qe_common.o
 obj-$(CONFIG_UCC)  += ucc.o
 obj-$(CONFIG_UCC_SLOW) += ucc_slow.o
-- 
2.1.0.27.g96db324

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev