Re: [PATCH V3 09/10] powerpc/mm: Lower the max real address to 53 bits

2017-03-27 Thread Paul Mackerras
On Tue, Mar 21, 2017 at 10:59:59PM +0530, Aneesh Kumar K.V wrote:
> Max value supported by hardware is 51 bits address. Radix page table define
> a slot of 57 bits for future expansion. We restrict the value supported in
> linux kernel 53 bits, so that we can use the bits between 57-53 for storing
> hash linux page table bits. This is done in the next patch.
> 
> This will free up the software page table bits to be used for features
> that are needed for both hash and radix. The current hash linux page table
> format doesn't have any free software bits. Moving hash linux page table
> specific bits to top of RPN field free up the software bits for other purpose.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Paul Mackerras 


Re: [PATCH V3 10/10] powerpc/mm: Move hash specific pte bits to be top bits of RPN

2017-03-27 Thread Paul Mackerras
On Tue, Mar 21, 2017 at 11:00:00PM +0530, Aneesh Kumar K.V wrote:
> We don't support the full 57 bits of physical address and hence can overload
> the top bits of RPN as hash specific pte bits.
> 
> Signed-off-by: Aneesh Kumar K.V 

Reviewed-by: Paul Mackerras 


Re: [PATCH 12/12] powerpc/kvm: Native usage of the XIVE interrupt controller

2017-03-27 Thread Paul Mackerras
On Mon, Mar 20, 2017 at 05:49:14PM +1100, Benjamin Herrenschmidt wrote:
> This patch makes KVM capable of using the XIVE interrupt controller
> to provide the standard PAPR "XICS" style hypercalls. It is necessary
> for proper operations when the host uses XIVE natively.
> 
> This has been lightly tested on an actual system, including PCI
> pass-through with a TG3 device.
> 
> Signed-off-by: Benjamin Herrenschmidt 

Looks good overall, some comments below...

> ---
>  arch/powerpc/include/asm/kvm_book3s_asm.h |2 +
>  arch/powerpc/include/asm/kvm_host.h   |   28 +-
>  arch/powerpc/include/asm/kvm_ppc.h|   38 +
>  arch/powerpc/include/asm/xive.h   |   11 +-
>  arch/powerpc/kernel/asm-offsets.c |   10 +
>  arch/powerpc/kvm/Makefile |4 +-
>  arch/powerpc/kvm/book3s.c |   73 +-
>  arch/powerpc/kvm/book3s_hv.c  |   52 +-
>  arch/powerpc/kvm/book3s_hv_builtin.c  |  108 ++
>  arch/powerpc/kvm/book3s_hv_rm_xics.c  |   10 +-
>  arch/powerpc/kvm/book3s_hv_rm_xive.c  |   47 +
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S   |   60 +-
>  arch/powerpc/kvm/book3s_rtas.c|   21 +-
>  arch/powerpc/kvm/book3s_xics.c|   35 +-
>  arch/powerpc/kvm/book3s_xics.h|5 +
>  arch/powerpc/kvm/book3s_xive.c| 1898 
> +
>  arch/powerpc/kvm/book3s_xive.h|  251 
>  arch/powerpc/kvm/book3s_xive_template.c   |  490 
>  arch/powerpc/kvm/irq.h|1 +
>  arch/powerpc/kvm/powerpc.c|   17 +-
>  arch/powerpc/platforms/powernv/opal.c |1 +
>  arch/powerpc/sysdev/xive/common.c |  131 +-
>  arch/powerpc/sysdev/xive/native.c |   92 +-
>  include/linux/kvm_host.h  |1 -
>  virt/kvm/kvm_main.c   |4 -
>  25 files changed, 3305 insertions(+), 85 deletions(-)
>  create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xive.c
>  create mode 100644 arch/powerpc/kvm/book3s_xive.c
>  create mode 100644 arch/powerpc/kvm/book3s_xive.h
>  create mode 100644 arch/powerpc/kvm/book3s_xive_template.c
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h 
> b/arch/powerpc/include/asm/kvm_book3s_asm.h
> index 0593d94..e719002 100644
> --- a/arch/powerpc/include/asm/kvm_book3s_asm.h
> +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
> @@ -111,6 +111,8 @@ struct kvmppc_host_state {
>   struct kvm_vcpu *kvm_vcpu;
>   struct kvmppc_vcore *kvm_vcore;
>   void __iomem *xics_phys;
> + void __iomem *xive_tm_area_phys;
> + void __iomem *xive_tm_area_virt;

Does this cause the paca to become a cacheline larger?  (Not that
there is much alternative to having these fields.)

>   u32 saved_xirr;
>   u64 dabr;
>   u64 host_mmcr[7];   /* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 7bba8f4..fc491ac 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -205,6 +205,12 @@ struct kvmppc_spapr_tce_table {
>  /* XICS components, defined in book3s_xics.c */
>  struct kvmppc_xics;
>  struct kvmppc_icp;
> +extern struct kvm_device_ops kvm_xics_ops;
> +
> +/* XIVE components, defined in book3s_xive.c */
> +struct kvmppc_xive;
> +struct kvmppc_xive_vcpu;
> +extern struct kvm_device_ops kvm_xive_ops;
>  
>  struct kvmppc_passthru_irqmap;
>  
> @@ -293,6 +299,7 @@ struct kvm_arch {
>  #endif
>  #ifdef CONFIG_KVM_XICS
>   struct kvmppc_xics *xics;
> + struct kvmppc_xive *xive;
>   struct kvmppc_passthru_irqmap *pimap;
>  #endif
>   struct kvmppc_ops *kvm_ops;
> @@ -421,7 +428,7 @@ struct kvmppc_passthru_irqmap {
>  
>  #define KVMPPC_IRQ_DEFAULT   0
>  #define KVMPPC_IRQ_MPIC  1
> -#define KVMPPC_IRQ_XICS  2
> +#define KVMPPC_IRQ_XICS  2 /* Includes a XIVE option */
>  
>  #define MMIO_HPTE_CACHE_SIZE 4
>  
> @@ -443,6 +450,21 @@ struct mmio_hpte_cache {
>  
>  struct openpic;
>  
> +/* QW0 and QW1 of a context */
> +union xive_qw01 {
> + struct {
> + u8  nsr;
> + u8  cppr;
> + u8  ipb;
> + u8  lsmfb;
> + u8  ack;
> + u8  inc;
> + u8  age;
> + u8  pipr;
> + };
> + __be64 qw;
> +};

This is slightly confusing because a "QW" (quadword) would normally be
128 bits, but this union is 64 bits.

> +
>  struct kvm_vcpu_arch {
>   ulong host_stack;
>   u32 host_pid;
> @@ -688,6 +710,10 @@ struct kvm_vcpu_arch {
>   struct openpic *mpic;   /* KVM_IRQ_MPIC */
>  #ifdef CONFIG_KVM_XICS
>   struct kvmppc_icp *icp; /* XICS presentation controller */
> + struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
> + __be32 xive_cam_word;/* Cooked W2 in proper endian with valid bit */
> + u32 xive_pushed; /* Is the VP push

[PATCH kernel] powerpc/iommu: Do not call PageTransHuge() on tail pages

2017-03-27 Thread Alexey Kardashevskiy
The CMA pages migration code does not support compound pages at
the moment so it performs few tests before proceeding to actual page
migration.

One of the tests - PageTransHuge() - has VM_BUG_ON_PAGE(PageTail()) as
it should be called on head pages. Since we also test for PageCompound(),
and it contains PageTail(), we can simply move PageCompound() in front
of PageTransHuge() and therefore avoid possible VM_BUG_ON_PAGE.

Signed-off-by: Alexey Kardashevskiy 
---

Some of actual POWER8 systems do crash on that BUG_ON.
---
 arch/powerpc/mm/mmu_context_iommu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/mm/mmu_context_iommu.c 
b/arch/powerpc/mm/mmu_context_iommu.c
index 497130c5c742..ba7fccf993b3 100644
--- a/arch/powerpc/mm/mmu_context_iommu.c
+++ b/arch/powerpc/mm/mmu_context_iommu.c
@@ -81,7 +81,7 @@ struct page *new_iommu_non_cma_page(struct page *page, 
unsigned long private,
gfp_t gfp_mask = GFP_USER;
struct page *new_page;
 
-   if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+   if (PageCompound(page) || PageHuge(page) || PageTransHuge(page))
return NULL;
 
if (PageHighMem(page))
@@ -100,7 +100,7 @@ static int mm_iommu_move_page_from_cma(struct page *page)
LIST_HEAD(cma_migrate_pages);
 
/* Ignore huge pages for now */
-   if (PageHuge(page) || PageTransHuge(page) || PageCompound(page))
+   if (PageCompound(page) || PageHuge(page) || PageTransHuge(page))
return -EBUSY;
 
lru_add_drain();
-- 
2.11.0



[PATCH kernel] powerpc/powernv/iommu: Fix compile breakage in iommu_tce_xchg_rm

2017-03-27 Thread Alexey Kardashevskiy
Signed-off-by: Alexey Kardashevskiy 
---

Please squash it into:
[02/10] powerpc/powernv/iommu: Add real mode version of 
iommu_table_ops::exchange()
---
 arch/powerpc/kernel/iommu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index 9bace5df05d5..685a4767b722 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -1004,6 +1004,7 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned 
long entry,
 }
 EXPORT_SYMBOL_GPL(iommu_tce_xchg);
 
+#ifdef CONFIG_PPC_BOOK3S_64
 long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
unsigned long *hpa, enum dma_data_direction *direction)
 {
@@ -1026,6 +1027,7 @@ long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned 
long entry,
return ret;
 }
 EXPORT_SYMBOL_GPL(iommu_tce_xchg_rm);
+#endif
 
 int iommu_take_ownership(struct iommu_table *tbl)
 {
-- 
2.11.0



Re: [PATCH v5 08/13] powerpc/perf: PMU functions for Core IMC and hotplugging

2017-03-27 Thread Madhavan Srinivasan



On Thursday 23 March 2017 06:39 PM, Gautham R Shenoy wrote:

Hi Maddy, Hemant, Anju,

On Thu, Mar 16, 2017 at 01:05:02PM +0530, Madhavan Srinivasan wrote:

[..snip..]


+
+static void core_imc_change_cpu_context(int old_cpu, int new_cpu)
+{
+   if (!core_imc_pmu)
+   return;
+   perf_pmu_migrate_context(&core_imc_pmu->pmu, old_cpu, new_cpu);
+}
+
+
+static int ppc_core_imc_cpu_online(unsigned int cpu)
+{
+   int ret;
+
+   /* If a cpu for this core is already set, then, don't do anything */
+   ret = cpumask_any_and(&core_imc_cpumask,
+cpu_sibling_mask(cpu));
+   if (ret < nr_cpu_ids)
+   return 0;
+
+   /* Else, set the cpu in the mask, and change the context */
+   cpumask_set_cpu(cpu, &core_imc_cpumask);
+   core_imc_change_cpu_context(-1, cpu);

So, in the core case, we are ok as long as any cpu in the core is
present in the imc_cpumask. It need not have to be the smallest online
cpu in the core.

Can the same logic be applied to the earlier nest case ?


Yes. This makes sense. Let me look at this.

Thanks for review
Maddy



We can have a single function for cpu_offline and cpu_online which
implements these checks and sets the cpu bit if required.

ppc_entity_imc_cpu_offline(unsigned int cpu, cpumask_t
   entity_imc_mask,
   entity_imc_change_cpu_context_fn)
{
.
.
.

}


static ppc_nest_imc_cpu_offline(unsigned int cpu)
{
return ppc_entity_imc_cpu_offline(cpu, nest_imc_mask,
  nest_imc_change_cpu_context);
}

And similar ones for core imc and thread imc.

Does this sound reasonable ?



+   return 0;
+}
+
+static int ppc_core_imc_cpu_offline(unsigned int cpu)
+{
+   int target;
+   unsigned int ncpu;
+
+   /*
+* clear this cpu out of the mask, if not present in the mask,
+* don't bother doing anything.
+*/
+   if (!cpumask_test_and_clear_cpu(cpu, &core_imc_cpumask))
+   return 0;
+
+   /* Find any online cpu in that core except the current "cpu" */
+   ncpu = cpumask_any_but(cpu_sibling_mask(cpu), cpu);
+
+   if (ncpu < nr_cpu_ids) {
+   target = ncpu;
+   cpumask_set_cpu(target, &core_imc_cpumask);
+   } else
+   target = -1;
+
+   /* migrate the context */
+   core_imc_change_cpu_context(cpu, target);
+
+   return 0;
+}
+

--
Thanks and Regards
gautham.




Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings

2017-03-27 Thread Oliver O'Halloran
On Tue, Mar 28, 2017 at 2:03 PM, Michael Ellerman  wrote:
> Oliver O'Halloran  writes:
>> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman  
>> wrote:
>>> Oliver O'Halloran  writes:
 + /* threads that share a chip-id are considered siblings (same die) */
>>>
>>> Also "Threads" :)
>>
>> The cpus masks are all built in terms of threads, so this is
>> technically correct even if it sounds stupid. Maybe "logical cpus"
>> would be better?
>
> No I meant you need a capital "T" !

capital letters are against my religion.

>
> cheers


Re: [PATCH 01/12] powerpc: Disable HFSCR:TM if TM not supported

2017-03-27 Thread Sam Bobroff
On Mon, Mar 20, 2017 at 05:49:03PM +1100, Benjamin Herrenschmidt wrote:
> Otherwise KVM guests might mess with it even when told not
> to causing bad thing interrupts in the host
> 
> Signed-off-by: Benjamin Herrenschmidt 

I've tested this on a P8, with a kernel and QEMU close to their
respective current master branches, and if:
* the host is configured without CONFIG_PPC_TRANSACTIONAL_MEM,
* and the guest is configured with CONFIG_PPC_TRANSACTIONAL_MEM,
* and the guest runs a program that uses HTM (in my tests, just a loop
  doing some floating point multiplies in a transaction)...

Without the patch the host will OOPS, usually in __kvmppc_vcore_entry,
and kill QEMU. On a busy host this is sometimes followed by "Oops: Bad
kernel stack pointer, sig: 6" and the host dies.

With the patch the userspace test program is killed with a SIGILL. The
guest and host are unaffected.

Cheers,
Sam.
> ---
>  arch/powerpc/kernel/setup_64.c | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
> index 9cfaa8b..b372b23 100644
> --- a/arch/powerpc/kernel/setup_64.c
> +++ b/arch/powerpc/kernel/setup_64.c
> @@ -236,6 +236,16 @@ static void cpu_ready_for_interrupts(void)
>   mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
>   }
>  
> + /*
> +  * Fixup HFSCR:TM based on CPU features. The bit is set by our
> +  * early asm init because at that point we haven't updated our
> +  * CPU features from firmware and device-tree. Here we have,
> +  * so let's do it
> +  */
> + if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
> + !early_cpu_has_feature(CPU_FTR_TM_COMP))
> + mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
> +
>   /* Set IR and DR in PACA MSR */
>   get_paca()->kernel_msr = MSR_KERNEL;
>  }
> -- 
> 2.9.3



Re: [kernel-hardening] [PATCH v5 1/4] gcc-plugins: Add the initify gcc plugin

2017-03-27 Thread Andrew Donnellan

On 27/03/17 18:38, Andrew Donnellan wrote:

On 01/02/17 07:24, Kees Cook wrote:

From: Emese Revfy 

The kernel already has a mechanism to free up code and data memory that
is only used during kernel or module initialization.  This plugin will
teach the compiler to find more such code and data that can be freed
after initialization.


Currently checking whether we can wire this up for powerpc without too
many problems...


Added "select HAVE_GCC_PLUGIN_INITIFY_INIT_EXIT if GCC_PLUGINS" to 
arch/powerpc/Kconfig and have successfully compiled and booted a modular 
and non-modular powernv_defconfig, it looks like we handle the .exit 
sections correctly.


Could this be folded in for further testing when you get around to v6, Kees?

--
Andrew Donnellan  OzLabs, ADL Canberra
andrew.donnel...@au1.ibm.com  IBM Australia Limited



Re: [PATCH 1/5] powerpc/smp: use cpu_to_chip_id() to find siblings

2017-03-27 Thread Michael Ellerman
Oliver O'Halloran  writes:
> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman  
> wrote:
>> Oliver O'Halloran  writes:
>>> + /* threads that share a chip-id are considered siblings (same die) */
>>
>> Also "Threads" :)
>
> The cpus masks are all built in terms of threads, so this is
> technically correct even if it sounds stupid. Maybe "logical cpus"
> would be better?

No I meant you need a capital "T" !

cheers


Re: [PATCH 2/5] powerpc/smp: add set_cpus_related()

2017-03-27 Thread Michael Ellerman
Oliver O'Halloran  writes:

> On Wed, Mar 15, 2017 at 10:18 PM, Michael Ellerman  
> wrote:
>> Oliver O'Halloran  writes:
>>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>>> index dfe0e1d9cd06..1c531887ca51 100644
>>> --- a/arch/powerpc/kernel/smp.c
>>> +++ b/arch/powerpc/kernel/smp.c
>>> @@ -377,6 +377,25 @@ static void smp_store_cpu_info(int id)
>>>  #endif
>>>  }
>>>
>>> +/*
>>> + * Relationships between CPUs are maintained in a set of per-cpu cpumasks. 
>>> We
>>> + * need to ensure that they are kept consistant between CPUs when they are
>>> + * changed.
>>> + *
>>> + * This is slightly tricky since the core mask must be a strict superset of
>>> + * the sibling mask.
>>> + */
>>> +static void set_cpus_related(int i, int j, bool related, struct cpumask 
>>> *(*relation_fn)(int))
>>> +{
>>> + if (related) {
>>> + cpumask_set_cpu(i, relation_fn(j));
>>> + cpumask_set_cpu(j, relation_fn(i));
>>> + } else {
>>> + cpumask_clear_cpu(i, relation_fn(j));
>>> + cpumask_clear_cpu(j, relation_fn(i));
>>> + }
>>> +}
>>
>> I think you pushed the abstraction one notch too far on this one, or
>> perhaps not far enough.
>>
>> We end up with a function called "set" that might clear, depending on a
>> bool you pass. Which is hard to parse, eg:
>>
>> set_cpus_related(cpu, base + i, false, cpu_sibling_mask);
>>
>> And I know there's two places where we pass an existing bool "add", but
>> there's four where we pass true or false.
>
> I think you're looking at this patch.

Yeah I guess I was.

> With the full series applied we never pass a literal to
> set_cpus_related() directly:
>
> [12:14 oliver ~/.../powerpc/kernel (p9-sched $%)]$ gg set_cpus_related
> smp.c:391:static void set_cpus_related(int i, int j, bool related,
> struct cpumask *(*relation_fn)(int))
> smp.c:647:  set_cpus_related(cpu, cpu, add, cpu_core_mask);
> smp.c:651:  set_cpus_related(cpu, i, add, cpu_core_mask);
> smp.c:685:  set_cpus_related(cpu, cpu, onlining, mask_fn);
> smp.c:697:  set_cpus_related(cpu, i, onlining, mask_fn);
> smp.c:721:  set_cpus_related(cpu, base + i, onlining, 
> cpu_sibling_mask);
> smp.c:736:  set_cpus_related(cpu, cpu, onlining, cpu_core_mask);
> smp.c:746:  set_cpus_related(cpu, i, onlining, cpu_core_mask);

Yeah I guess it's a bit better.

But it's still very non-obvious what each of those lines is doing.

> I agree that set_cpus_related() is probably a bad name,
> make_cpus_related() maybe?

Except in the clearing case it's unrelating them.

I think the fact that we can't think of a meaningful name is a sign it's
not a good API.

>> If we want to push it in that direction I think we should just pass the
>> set/clear routine instead of the flag, so:
>>
>> do_cpus_related(cpu, base + i, cpumask_clear_cpu, cpu_sibling_mask);
>>
>> But that might be overdoing it.
>
> I think this would be ok.
>
>>
>> So I think we should just do:
>>
>> static void set_cpus_related(int i, int j, struct cpumask *(*mask_func)(int))
>> {
>> cpumask_set_cpu(i, mask_func(j));
>> cpumask_set_cpu(j, mask_func(i));
>> }
>>
>> static void clear_cpus_related(int i, int j, struct cpumask 
>> *(*mask_func)(int))
>> {
>> cpumask_clear_cpu(i, mask_func(j));
>> cpumask_clear_cpu(j, mask_func(i));
>> }
>>
>> So the cases with add become:
>>
>> if (add)
>> set_cpus_related(cpu, i, cpu_core_mask(i));
>> else
>> clear_cpus_related(cpu, i, cpu_core_mask(i));
>
> Dunno, I was trying to get rid of this sort of thing since the logic
> is duplicated in a lot of places. Seemed to me that it was just
> pointlessly verbose rather than being helpfully explicit.

It's annoyingly verbose perhaps, but I don't think pointlessly.

Your version passes two CPU numbers, a bool and a function to another
function, and based on the bool calls a third or fourth function and
passes the CPU numbers and result of the first function.

My version(s) above takes two cpu numbers and a mask, and calls one
function passing it the CPU numbers and the mask.

So it's clearly simpler and more explicit, and because the function
names are actually meaningful you can have some hope of determining what
they do without looking at the definition every time you see it.

cheers


Re: [PATCH 4/5] powerpc/smp: add cpu_cache_mask

2017-03-27 Thread Michael Ellerman
Oliver O'Halloran  writes:

> On Wed, Mar 15, 2017 at 10:26 PM, Michael Ellerman  
> wrote:
>> Oliver O'Halloran  writes:
>>
>>> Traditionally we have only ever tracked which CPUs are in the same core
>>> (cpu_sibling_mask) and on the same die (cpu_core_mask). For Power9 we
>>> need to be aware of which CPUs share cache with each other so this patch
>>> adds cpu_cache_mask and the underlying cpu_cache_map variable to track
>>> this.
>
>> Some CPUs on Power8 share L3, or L4.
>
> Eh... it's not really the same. The "L4" is part of the memory buffers
> and it's function is conceptually different to the processor caches.
> The L3 on P8 is only shared when the core that owns is offline (or
> sleeping) so the scheduler doesn't really need to be aware of it.

But that's exactly my point, this mask only tracks whether CPUs share an
L2, so it should be named as such.

>> I think just call it cpu_l2cache_map to make it explicit.
>
> I was being deliberately vague. I know it's only a shared currently,
> but it's possible we might have a (real) shared L3 in the future. The
> latest high-end x86 chips have some of l3 sharing across the entire
> chip so you never know.

Sure, but in that case we'd probably want a new mask to track which CPUs
share L3, because we'd probably also have CPUs sharing L2, and those
masks might not be equal.

But if I'm wrong we can just rename it *then*.

>I'm not particularly attached to the name though, so i'll rename it
>if you really want.

Yes please.

cheers


Re: [PATCH 1/2] powerpc: string: implement optimized memset variants

2017-03-27 Thread Michael Ellerman
"Naveen N. Rao"  writes:

> diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
> index 85fa9869aec5..ec531de6 100644
> --- a/arch/powerpc/lib/mem_64.S
> +++ b/arch/powerpc/lib/mem_64.S
> @@ -13,6 +13,23 @@
>  #include 
>  #include 
>  
> +_GLOBAL(__memset16)
> + rlwimi  r4,r4,16,0,15
> + /* fall through */
> +
> +_GLOBAL(__memset32)
> + rldimi  r4,r4,32,0
> + /* fall through */
> +
> +_GLOBAL(__memset64)
> + neg r0,r3
> + andi.   r0,r0,7
> + cmplw   cr1,r5,r0
> + b   .Lms
> +EXPORT_SYMBOL(__memset16)
> +EXPORT_SYMBOL(__memset32)
> +EXPORT_SYMBOL(__memset64)

You'll have to convince me that's better than what GCC produces.

cheers


Re: [PATCH kernel] KVM: PPC: Preserve storage control bits

2017-03-27 Thread David Gibson
On Fri, Mar 24, 2017 at 05:49:22PM +1100, Alexey Kardashevskiy wrote:
> PR KVM page fault handler performs eaddr to pte translation for a guest,
> however kvmppc_mmu_book3s_64_xlate() does not preserve WIMG bits
> (storage control) in the kvmppc_pte struct. If PR KVM is running as
> a second level guest under HV KVM, and PR KVM tries inserting HPT entry,
> this fails in HV KVM if it already has this mapping.
> 
> This preserves WIMG bits between kvmppc_mmu_book3s_64_xlate() and
> kvmppc_mmu_map_page().
> 
> Signed-off-by: Alexey Kardashevskiy 

Reviewed-by: David Gibson 

> ---
> 
> This allows MMIO BAR mapping for nested guest with VFIO.
> 
> This is the check in HV KVM which failed:
> 
> arch/powerpc/kvm/book3s_hv_rm_mmu.c
> long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
> [...]
> 
> /*If we had host pte mapping then  Check WIMG */
> if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
> if (is_ci)
> return H_PARAMETER;
> /*
>  * Allow guest to map emulated device memory as
>  * uncacheable, but actually make it cacheable.
>  */
> ptel &= ~(HPTE_R_W|HPTE_R_I|HPTE_R_G);
> ptel |= HPTE_R_M;
> }
> ---
>  arch/powerpc/include/asm/kvm_host.h   | 1 +
>  arch/powerpc/kvm/book3s_64_mmu.c  | 1 +
>  arch/powerpc/kvm/book3s_64_mmu_host.c | 2 ++
>  arch/powerpc/kvm/book3s_pr.c  | 2 +-
>  4 files changed, 5 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 7bba8f415627..bf6822cd4f86 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -345,6 +345,7 @@ struct kvmppc_pte {
>   bool may_read   : 1;
>   bool may_write  : 1;
>   bool may_execute: 1;
> + unsigned long wimg;
>   u8 page_size;   /* MMU_PAGE_xxx */
>  };
>  
> diff --git a/arch/powerpc/kvm/book3s_64_mmu.c 
> b/arch/powerpc/kvm/book3s_64_mmu.c
> index 70153578131a..29ebe2fd5867 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu.c
> @@ -319,6 +319,7 @@ static int kvmppc_mmu_book3s_64_xlate(struct kvm_vcpu 
> *vcpu, gva_t eaddr,
>   gpte->may_execute = true;
>   gpte->may_read = false;
>   gpte->may_write = false;
> + gpte->wimg = r & HPTE_R_WIMG;
>  
>   switch (pp) {
>   case 0:
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c 
> b/arch/powerpc/kvm/book3s_64_mmu_host.c
> index 4b4e927c4822..145a61892c48 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_host.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
> @@ -145,6 +145,8 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct 
> kvmppc_pte *orig_pte,
>   else
>   kvmppc_mmu_flush_icache(pfn);
>  
> + rflags = (rflags & ~HPTE_R_WIMG) | orig_pte->wimg;
> +
>   /*
>* Use 64K pages if possible; otherwise, on 64K page kernels,
>* we need to transfer 4 more bits from guest real to host real addr.
> diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
> index ce437b98477e..f026b062c0ed 100644
> --- a/arch/powerpc/kvm/book3s_pr.c
> +++ b/arch/powerpc/kvm/book3s_pr.c
> @@ -537,7 +537,7 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>   int r = RESUME_GUEST;
>   int relocated;
>   int page_found = 0;
> - struct kvmppc_pte pte;
> + struct kvmppc_pte pte = { 0 };
>   bool dr = (kvmppc_get_msr(vcpu) & MSR_DR) ? true : false;
>   bool ir = (kvmppc_get_msr(vcpu) & MSR_IR) ? true : false;
>   u64 vsid;

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [PATCH net-next] net: ibmvnic: Remove unused net_stats member from struct ibmvnic_adapter

2017-03-27 Thread David Miller
From: Tobias Klauser 
Date: Mon, 27 Mar 2017 08:56:59 +0200

> The ibmvnic driver keeps its statistics in net_device->stats, so the
> net_stats member in struct ibmvnic_adapter is unused. Remove it.
> 
> Signed-off-by: Tobias Klauser 

Applied.


Re: [PATCH net-next] net: ibmveth: Remove unused stats member from struct ibmveth_adapter

2017-03-27 Thread David Miller
From: Tobias Klauser 
Date: Mon, 27 Mar 2017 08:56:15 +0200

> The ibmveth driver keeps its statistics in net_device->stats, so the
> stats member in struct ibmveth_adapter is unused. Remove it.
> 
> Signed-off-by: Tobias Klauser 

Applied.


[PATCH 2/2] powerpc: bpf: use memset32() to pre-fill traps in BPF page(s)

2017-03-27 Thread Naveen N. Rao
Use the newly introduced memset32() to pre-fill BPF page(s) with trap
instructions.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/net/bpf_jit_comp64.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index aee2bb817ac6..1a92ab6c245f 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -25,11 +25,7 @@ int bpf_jit_enable __read_mostly;
 
 static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
 {
-   int *p = area;
-
-   /* Fill whole space with trap instructions */
-   while (p < (int *)((char *)area + size))
-   *p++ = BREAKPOINT_INSTRUCTION;
+   memset32(area, BREAKPOINT_INSTRUCTION, size/4);
 }
 
 static inline void bpf_flush_icache(void *start, void *end)
-- 
2.11.1



[PATCH 1/2] powerpc: string: implement optimized memset variants

2017-03-27 Thread Naveen N. Rao
Based on Matthew Wilcox's patches for other architectures.

Signed-off-by: Naveen N. Rao 
---
 arch/powerpc/include/asm/string.h | 24 
 arch/powerpc/lib/mem_64.S | 19 ++-
 2 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/string.h 
b/arch/powerpc/include/asm/string.h
index da3cdffca440..b0e82466d4e8 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -23,6 +23,30 @@ extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
 
+#ifdef CONFIG_PPC64
+#define __HAVE_ARCH_MEMSET16
+#define __HAVE_ARCH_MEMSET32
+#define __HAVE_ARCH_MEMSET64
+
+extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
+extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
+extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
+
+static inline void *memset16(uint16_t *p, uint16_t v, __kernel_size_t n)
+{
+   return __memset16(p, v, n * 2);
+}
+
+static inline void *memset32(uint32_t *p, uint32_t v, __kernel_size_t n)
+{
+   return __memset32(p, v, n * 4);
+}
+
+static inline void *memset64(uint64_t *p, uint64_t v, __kernel_size_t n)
+{
+   return __memset64(p, v, n * 8);
+}
+#endif
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_POWERPC_STRING_H */
diff --git a/arch/powerpc/lib/mem_64.S b/arch/powerpc/lib/mem_64.S
index 85fa9869aec5..ec531de6 100644
--- a/arch/powerpc/lib/mem_64.S
+++ b/arch/powerpc/lib/mem_64.S
@@ -13,6 +13,23 @@
 #include 
 #include 
 
+_GLOBAL(__memset16)
+   rlwimi  r4,r4,16,0,15
+   /* fall through */
+
+_GLOBAL(__memset32)
+   rldimi  r4,r4,32,0
+   /* fall through */
+
+_GLOBAL(__memset64)
+   neg r0,r3
+   andi.   r0,r0,7
+   cmplw   cr1,r5,r0
+   b   .Lms
+EXPORT_SYMBOL(__memset16)
+EXPORT_SYMBOL(__memset32)
+EXPORT_SYMBOL(__memset64)
+
 _GLOBAL(memset)
neg r0,r3
rlwimi  r4,r4,8,16,23
@@ -20,7 +37,7 @@ _GLOBAL(memset)
rlwimi  r4,r4,16,0,15
cmplw   cr1,r5,r0   /* do we get that far? */
rldimi  r4,r4,32,0
-   PPC_MTOCRF(1,r0)
+.Lms:  PPC_MTOCRF(1,r0)
mr  r6,r3
blt cr1,8f
beq+3f  /* if already 8-byte aligned */
-- 
2.11.1



Re: Optimised memset64/memset32 for powerpc

2017-03-27 Thread Naveen N. Rao
On 2017/03/22 12:30PM, Matthew Wilcox wrote:
> On Wed, Mar 22, 2017 at 06:18:05AM -0700, Matthew Wilcox wrote:
> > There's one other potential user I've been wondering about, which are the
> > various console drivers.  They use 'memsetw' to blank the entire console
> > or lines of the console when scrolling, but the only architecture which
> > ever bothered implementing an optimised version of it was Alpha.
> > 
> > Might be worth it on powerpc actually ... better than a loop calling
> > cpu_to_le16() on each iteration.  That'd complete the set with a
> > memset16().
> 
> All hail plane rides ... This would need to be resplit and merged properly,
> but I think it makes life a little saner.

... not to forget train rides :)

Here's a straight-forward implementation for powerpc64, along with one
other user in bpf. It is obviously non-critical, but given that we have
64K pages on powerpc64, it does help to speed up the BPF JIT.

- Naveen

Naveen N. Rao (2):
  powerpc: string: implement optimized memset variants
  powerpc: bpf: use memset32() to pre-fill traps in BPF page(s)

 arch/powerpc/include/asm/string.h | 24 
 arch/powerpc/lib/mem_64.S | 19 ++-
 arch/powerpc/net/bpf_jit_comp64.c |  6 +-
 3 files changed, 43 insertions(+), 6 deletions(-)

-- 
2.11.1



[PATCH v2] powerpc: make /proc/self/stack always print the current stack

2017-03-27 Thread Thadeu Lima de Souza Cascardo
For the current task, the kernel stack would only tell the last time the
process was rescheduled, if ever. Use the current stack pointer for the
current task.

Otherwise, every once in a while, the stacktrace printed when reading
/proc/self/stack would look like the process is running in userspace,
while it's not, which some may consider as a bug.

This is also consistent with some other architectures, like x86 and arm,
at least.

Signed-off-by: Thadeu Lima de Souza Cascardo 
---
 arch/powerpc/kernel/stacktrace.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c
index 6671195..d534ed9 100644
--- a/arch/powerpc/kernel/stacktrace.c
+++ b/arch/powerpc/kernel/stacktrace.c
@@ -59,7 +59,14 @@ EXPORT_SYMBOL_GPL(save_stack_trace);
 
 void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
 {
-   save_context_stack(trace, tsk->thread.ksp, tsk, 0);
+   unsigned long sp;
+
+   if (tsk == current)
+   sp = current_stack_pointer();
+   else
+   sp = tsk->thread.ksp;
+
+   save_context_stack(trace, sp, tsk, 0);
 }
 EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
 
-- 
2.9.3



Re: Build failure -- powerpc/boot: Add OPAL console to epapr wrappers

2017-03-27 Thread Daniel Walker

On 03/24/2017 07:16 PM, Oliver O'Halloran wrote:

On Sat, Mar 25, 2017 at 4:00 AM, Daniel Walker  wrote:

I get this build failure,


In file included from arch/powerpc/boot/fdt.c:51:
../arch/powerpc/boot/libfdt_env.h:9: error: redefinition of typedef
'uint32_t'
../arch/powerpc/boot/types.h:20: note: previous declaration of 'uint32_t'
was here
../arch/powerpc/boot/libfdt_env.h:10: error: redefinition of typedef
'uint64_t'
../arch/powerpc/boot/types.h:21: note: previous declaration of 'uint64_t'
was here
make[2]: *** [arch/powerpc/boot/fdt.o] Error 1
make[1]: *** [uImage] Error 2
make[1]: Leaving directory `/nobackup/danielwa/linux/t1040'
make: *** [sub-make] Error 2


and it bisects to ,


commit 656ad58ef19e2a763fa5c938b20ae0f6b8d67242
Author: Oliver O'Halloran 
Date:   Fri Jul 1 00:34:37 2016 +1000

 powerpc/boot: Add OPAL console to epapr wrappers

 This patch adds an OPAL console backend to the powerpc boot wrapper so
 that decompression failures inside the wrapper can be reported to the
 user. This is important since it typically indicates data corruption in
 the firmware and other nasty things.

 Currently this only works when building a little endian kernel. When
 compiling a 64 bit BE kernel the wrapper is always build 32 bit to be
 compatible with some 32 bit firmwares. BE support will be added at a
 later date. Another limitation of this is that only the "raw" type of
 OPAL console is supported, however machines that provide a hvsi console
 also provide a raw console so this is not an issue in practice.

 Actually-written-by: Benjamin Herrenschmidt 
 Signed-off-by: Oliver O'Halloran 
 [mpe: Move #ifdef __powerpc64__ to avoid warnings on 32-bit]
 Signed-off-by: Michael Ellerman 


I can provide a config file if needed. My apologies if this was already
reported.

Thanks for the report, I don't think this is a known bug. mpe's build
testing is pretty thorough so I'm surprised this wasn't caught sooner.

A config file and the version of gcc that you're using would be useful.

Oliver



Config attached , it's for a Fresecale t1042 machine. The GCC is custom 
based on 4.4.1 .



Daniel



t1042.config.gz
Description: application/gzip


tty/hvc: possible irq lock inversion dependency detected

2017-03-27 Thread Denis Kirjanov
Hi,

Ok, now we have a new locking issue with interrupts which I found on my P8 box.
I haven't found where we take the console_lock, only in hvc_remove.

BTW, Ben, looks like it's a bug in hvc_remove:
we take spin_lock and then semaphore lock.

[178327.461954] =
[178327.462043] [ INFO: possible irq lock inversion dependency detected ]
[178327.462105] 4.11.0-rc3-00022-gc7e790c #1 Not tainted
[178327.462154] -
[178327.462216] swapper/0/0 just changed the state of lock:
[178327.462266]  (&(&hp->lock)->rlock){-.}, at:
[] .hvc_poll+0x4c/0x380
[178327.462348] but this lock took another, HARDIRQ-unsafe lock in the past:
[178327.462409]  (console_lock){+.+.+.}
[178327.462414]

and interrupts could create inverse lock ordering between them.

[178327.462559]
other info that might help us debug this:
[178327.462621]  Possible interrupt unsafe locking scenario:

[178327.462682]CPU0CPU1
[178327.462731]
[178327.462780]   lock(console_lock);
[178327.462820]local_irq_disable();
[178327.462880]lock(&(&hp->lock)->rlock);
[178327.462945]lock(console_lock);
[178327.463008]   
[178327.463034] lock(&(&hp->lock)->rlock);
[178327.463074]
 *** DEADLOCK ***

[178327.463137] no locks held by swapper/0/0.
[178327.463174]
the shortest dependencies between 2nd lock and 1st lock:
[178327.463248]  -> (console_lock){+.+.+.} ops: 6747393622016 {
[178327.463316] HARDIRQ-ON-W at:
[178327.463359]   .lock_acquire+0xf0/0x340
[178327.463410]   .console_lock+0x60/0xa0
[178327.463463]   .con_init+0x24/0x2d0
[178327.463514]   .console_init+0x4c/0x6c
[178327.463566]   .start_kernel+0x3d4/0x584
[178327.463629]   start_here_common+0x1c/0x520
[178327.463690] SOFTIRQ-ON-W at:
[178327.463731]   .lock_acquire+0xf0/0x340
[178327.463782]   .console_lock+0x60/0xa0
[178327.463833]   .con_init+0x24/0x2d0
[178327.463884]   .console_init+0x4c/0x6c
[178327.463935]   .start_kernel+0x3d4/0x584
[178327.463998]   start_here_common+0x1c/0x520
[178327.464059] RECLAIM_FS-ON-W at:
[178327.464100]  .lockdep_trace_alloc+0xa8/0x180
[178327.464163]  .kmem_cache_alloc_trace+0x5c/0x4f0
[178327.464227]  .device_create_groups_vargs+0x74/0x1b0
[178327.464301]  .device_create+0x2c/0x40
[178327.464364]  .fb_console_init+0x64/0x1cc
[178327.464426]  .do_one_initcall+0x5c/0x1c0
[178327.464489]  .kernel_init_freeable+0x2d4/0x3ac
[178327.464551]  .kernel_init+0x1c/0x130
[178327.464614]  .ret_from_kernel_thread+0x58/0x70
[178327.464674] INITIAL USE at:
[178327.464715]  0x1a492c0
[178327.464754]  0x1a58870
[178327.464792]  0x1a5b94c
[178327.464830]  0x2781bcc
[178327.464869]  0x2782108
[178327.464907]  0x191b448
[178327.464945]   }
[178327.464973]   ... key  at: []
console_lock_dep_map+0x0/0x30
[178327.465046]   ... acquired at:
[178327.465085].console_lock+0x60/0xa0
[178327.465124].hvc_remove+0xa8/0xf0
[178327.465163].hvc_opal_remove+0x28/0x90
[178327.465201].platform_drv_remove+0x44/0x80
[178327.465251].driver_probe_device+0x144/0x4d0
[178327.465302].__driver_attach+0x10c/0x110
[178327.465352].bus_for_each_dev+0x84/0xf0
[178327.465391].driver_attach+0x24/0x40
[178327.465429].bus_add_driver+0x278/0x300
[178327.465468].driver_register+0x8c/0x170
[178327.465507].__platform_driver_register+0x48/0x60
[178327.465557].hvc_opal_init+0x30/0x4c
[178327.465596].do_one_initcall+0x5c/0x1c0
[178327.465635].kernel_init_freeable+0x2d4/0x3ac
[178327.465685].kernel_init+0x1c/0x130
[178327.465725].ret_from_kernel_thread+0x58/0x70

[178327.465799] -> (&(&hp->lock)->rlock){-.} ops: 8160437862400 {
[178327.465867]IN-HARDIRQ-W at:
[178327.465908] .lock_acquire+0xf0/0x340
[178327.465960] ._raw_spin_lock_irqsave+0x68/0xd0
[178327.466022] .hvc_poll+0x4c/0x380
[178327.466074] .hvc_handle_interrupt+0x14/0x40
[178327.466136] .__handle_irq_event_percpu+0xe4/0x630
[178327.466199] .handle_irq_event_percpu+0x28/0x80
[178327.466262] .handle_irq_event+0x54/0xa0
[178327.466324] .handle_level_irq+0xf8/0x1f0
[178327.466387]  

Re: [v3 PATCH 2/4] powernv:smp: Add busy-wait loop as fall back for CPU-Hotplug

2017-03-27 Thread Michael Ellerman
"Gautham R. Shenoy"  writes:

> diff --git a/arch/powerpc/platforms/powernv/idle.c 
> b/arch/powerpc/platforms/powernv/idle.c
> index 419edff..f335e0f 100644
> --- a/arch/powerpc/platforms/powernv/idle.c
> +++ b/arch/powerpc/platforms/powernv/idle.c
> @@ -283,8 +283,16 @@ unsigned long pnv_cpu_offline(unsigned int cpu)
>   } else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
>  (idle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
>   srr1 = power7_sleep();
> - } else {
> + } else if (idle_states & OPAL_PM_NAP_ENABLED) {
>   srr1 = power7_nap(1);
> + } else {
> + /* This is the fallback method. We emulate snooze */
> + while (!generic_check_cpu_restart(cpu)) {

Breaks the SMP=n build :/

arch/powerpc/platforms/powernv/idle.c:299:11: error: implicit declaration of 
function 'generic_check_cpu_restart' [-Werror=implicit-function-declaration]

cheers


Re: [v7] powerpc/powernv: add hdat attribute to sysfs

2017-03-27 Thread Michael Ellerman
Hi Matt,

Sorry if you're getting sick of this at v7, but here's a few more
comments :D

Matt Brown  writes:

> The HDAT data area is consumed by skiboot and turned into a device-tree.
> In some cases we would like to look directly at the HDAT, so this patch
> adds a sysfs node to allow it to be viewed.  This is not possible through
> /dev/mem as it is reserved memory which is stopped by the /dev/mem filter.

You need to update the change log now that the patch has expanded in its
purpose.


> diff --git a/arch/powerpc/platforms/powernv/opal.c 
> b/arch/powerpc/platforms/powernv/opal.c
> index 2822935..b8f057f 100644
> --- a/arch/powerpc/platforms/powernv/opal.c
> +++ b/arch/powerpc/platforms/powernv/opal.c
> @@ -604,6 +604,87 @@ static void opal_export_symmap(void)
>   pr_warn("Error %d creating OPAL symbols file\n", rc);
>  }
>  
> +static ssize_t export_attr_read(struct file *fp, struct kobject *kobj,
> +  struct bin_attribute *bin_attr, char *buf,
> +  loff_t off, size_t count)
> +{
> + return memory_read_from_buffer(buf, count, &off, bin_attr->private,
> +bin_attr->size);
> +}
> +
> +/*
> + * opal_export_attrs: creates a sysfs node for each property listed in
> + * the device-tree under /ibm,opal/firmware/exports/
> + * All new sysfs nodes are created under /opal/exports/.
> + * This allows for reserved memory regions (e.g. HDAT) to be read.
> + * The new sysfs nodes are only readable by root.
> + */
> +static void opal_export_attrs(void)
> +{
> + /* /sys/firmware/opal/exports */
> + struct kobject *opal_export_kobj;
> + struct bin_attribute *exported_attrs;
> + char **attr_name;
> +

You should pretty much never have a blank line in your variable
declarations.

> + struct bin_attribute *attr_tmp;
> + const __be64 *syms;
> + unsigned int size;
> + struct device_node *fw;

It's traditional to call these "np" for "node pointer" (or less commonly "dn").

> + struct property *prop;
> + int rc;
> + int attr_count = 0;
> + int n = 0;

It's usually better to delay initialisation of variables until when
they're needed, so in this case just prior to the loop that uses them.
Also notice attr_count is only used prior to the use of n, so you could
just use n for both. Though see below.

You should also group common types on one line when possible. And I and
some other folks with good taste, like the longest variable lines first
followed by the shorter ones, so in this case eg:

struct bin_attribute *exported_attrs, *attr_tmp;
struct kobject *opal_export_kobj;
struct device_node *np;
struct property *prop;
int rc, attr_count, n;
const __be64 *syms;
unsigned int size;
char **attr_name;

> +
> + /* Create new 'exports' directory */
> + opal_export_kobj = kobject_create_and_add("exports", opal_kobj);

Now that this is a local it doesn't need such a verbose name, it could
just be kobj.

> + if (!opal_export_kobj) {
> + pr_warn("kobject_create_and_add opal_exports failed\n");
> + return;
> + }
> +
> + fw = of_find_node_by_path("/ibm,opal/firmware/exports");
> + if (!fw)
> + return;
> +
> + for (prop = fw->properties; prop != NULL; prop = prop->next)
> + attr_count++;

So here we count the properties of the node.

> + if (attr_count > 2) {
> + exported_attrs = kzalloc(sizeof(exported_attrs)*(attr_count-2),
> + GFP_KERNEL);
> + attr_name = kzalloc(sizeof(char *)*(attr_count-2), GFP_KERNEL);

And here we alloc space for that number - 2.

> + }
> +
> + for_each_property_of_node(fw, prop) {

But this loop does no checking vs attr_count. I know you have the check
for "name" and "phandle" below, but if ever you didn't have those
properties this loop would overflow the end of exported_attrs and
corrupt something else on the heap.

> + attr_name[n] = kstrdup(prop->name, GFP_KERNEL);

Here we allocate memory for the property name ...

> + syms = of_get_property(fw, attr_name[n], &size);
> +
> + if (!strcmp(attr_name[n], "name") ||
> + !strcmp(attr_name[n], "phandle"))
> + continue;

But then here we don't free the attr_name we just kstrdup'ed.

>
> + if (!syms || size != 2 * sizeof(__be64))
> + continue;

And same here, as well as we've now allocated an extra bin_attr in
exported_attrs that we'll never use.


> + syms = of_get_property(fw, attr_name[n], &size);
...
> + if (!syms || size != 2 * sizeof(__be64))
> + continue;
...
> + attr_tmp->private = __va(be64_to_cpu(syms[0]));
> + attr_tmp->size = be64_to_cpu(syms[1]);

The actual reading of the property can be done in a cleaner way using
one of the he

Re: [RESEND PATCH v4 5/5] powerpc/fadump: update documentation about crashkernel parameter reuse

2017-03-27 Thread Michael Ellerman
Hari Bathini  writes:

> As we are reusing crashkernel parameter instead of fadump_reserve_mem
> parameter to specify the memory to reserve for fadump's crash kernel,
> update the documentation accordingly.
>
> Signed-off-by: Hari Bathini 
> ---
>  Documentation/powerpc/firmware-assisted-dump.txt |   23 
> ++
>  1 file changed, 15 insertions(+), 8 deletions(-)

Acked-by: Michael Ellerman 

cheers


Re: [RESEND PATCH v4 4/5] powerpc/fadump: reuse crashkernel parameter for fadump memory reservation

2017-03-27 Thread Michael Ellerman
Hari Bathini  writes:

> fadump supports specifying memory to reserve for fadump's crash kernel
> with fadump_reserve_mem kernel parameter. This parameter currently
> supports passing a fixed memory size, like fadump_reserve_mem=
> only. This patch aims to add support for other syntaxes like range-based
> memory size :[,:,:,...]
> which allows using the same parameter to boot the kernel with different
> system RAM sizes.
>
> As crashkernel parameter already supports the above mentioned syntaxes,
> this patch deprecates fadump_reserve_mem parameter and reuses crashkernel
> parameter instead, to specify memory for fadump's crash kernel memory
> reservation as well. If any offset is provided in crashkernel parameter,
> it will be ignored in case of fadump, as fadump reserves memory at end
> of RAM.
>
> Advantages using crashkernel parameter instead of fadump_reserve_mem
> parameter are one less kernel parameter overall, code reuse and support
> for multiple syntaxes to specify memory.
>
> Suggested-by: Dave Young 
> Signed-off-by: Hari Bathini 
> Reviewed-by: Mahesh Salgaonkar 
> ---
>  arch/powerpc/kernel/fadump.c |   23 ++-
>  1 file changed, 10 insertions(+), 13 deletions(-)

Acked-by: Michael Ellerman 

cheers


Re: [RESEND PATCH v4 3/5] powerpc/fadump: remove dependency with CONFIG_KEXEC

2017-03-27 Thread Michael Ellerman
Hari Bathini  writes:

> Now that crashkernel parameter parsing and vmcoreinfo related code is
> moved under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE, remove
> dependency with CONFIG_KEXEC for CONFIG_FA_DUMP. While here, get rid
> of definitions of fadump_append_elf_note() & fadump_final_note()
> functions to reuse similar functions compiled under CONFIG_CRASH_CORE.
>
> Signed-off-by: Hari Bathini 
> Reviewed-by: Mahesh Salgaonkar 
> ---
>  arch/powerpc/Kconfig   |   10 ++
>  arch/powerpc/include/asm/fadump.h  |2 ++
>  arch/powerpc/kernel/crash.c|2 --
>  arch/powerpc/kernel/fadump.c   |   34 +++---
>  arch/powerpc/kernel/setup-common.c |5 +
>  5 files changed, 16 insertions(+), 37 deletions(-)

Acked-by: Michael Ellerman 

cheers


Re: [PATCH v2 1/2] powerpc/powernv/cpuidle: Pass correct drv->cpumask for registration

2017-03-27 Thread Michael Ellerman
"Rafael J. Wysocki"  writes:

> On Thu, Mar 23, 2017 at 4:22 PM, Vaidyanathan Srinivasan
>  wrote:
>> drv->cpumask defaults to cpu_possible_mask in __cpuidle_driver_init().
>> On PowerNV platform cpu_present could be less than cpu_possible in cases
>> where firmware detects the cpu, but it is not available to the OS.  When
>> CONFIG_HOTPLUG_CPU=n, such cpus are not hotplugable at runtime and hence
>> we skip creating cpu_device.
>>
>> This breaks cpuidle on powernv where register_cpu() is not called for
>> cpus in cpu_possible_mask that cannot be hot-added at runtime.
>>
>> Trying cpuidle_register_device() on cpu without cpu_device will cause
>> crash like this:
>>
>> cpu 0xf: Vector: 380 (Data SLB Access) at [c00ff1503490]
>> pc: c022c8bc: string+0x34/0x60
>> lr: c022ed78: vsnprintf+0x284/0x42c
>> sp: c00ff1503710
>>msr: 90009033
>>dar: 60006000
>>   current = 0xc00ff148
>>   paca= 0xcfe82d00   softe: 0irq_happened: 0x01
>> pid   = 1, comm = swapper/8
>> Linux version 4.11.0-rc2 (sv@sagarika) (gcc version 4.9.4
>> (Buildroot 2017.02-4-gc28573e) ) #15 SMP Fri Mar 17 19:32:02 IST 2017
>> enter ? for help
>> [link register   ] c022ed78 vsnprintf+0x284/0x42c
>> [c00ff1503710] c022ebb8 vsnprintf+0xc4/0x42c (unreliable)
>> [c00ff1503800] c022ef40 vscnprintf+0x20/0x44
>> [c00ff1503830] c00ab61c vprintk_emit+0x94/0x2cc
>> [c00ff15038a0] c00acc9c vprintk_func+0x60/0x74
>> [c00ff15038c0] c0619694 printk+0x38/0x4c
>> [c00ff15038e0] c0224950 kobject_get+0x40/0x60
>> [c00ff1503950] c022507c kobject_add_internal+0x60/0x2c4
>> [c00ff15039e0] c0225350 kobject_init_and_add+0x70/0x78
>> [c00ff1503a60] c053c288 cpuidle_add_sysfs+0x9c/0xe0
>> [c00ff1503ae0] c053aeac cpuidle_register_device+0xd4/0x12c
>> [c00ff1503b30] c053b108 cpuidle_register+0x98/0xcc
>> [c00ff1503bc0] c085eaf0 powernv_processor_idle_init+0x140/0x1e0
>> [c00ff1503c60] c000cd60 do_one_initcall+0xc0/0x15c
>> [c00ff1503d20] c0833e84 kernel_init_freeable+0x1a0/0x25c
>> [c00ff1503dc0] c000d478 kernel_init+0x24/0x12c
>> [c00ff1503e30] c000b564 ret_from_kernel_thread+0x5c/0x78
>>
>> This patch fixes the bug by passing correct cpumask from
>> powernv-cpuidle driver.
>>
>> Signed-off-by: Vaidyanathan Srinivasan 
>
> That needs to be ACKed by someone familiar with powernv.

Acked-by: Michael Ellerman 

cheers


Re: [PATCH 12/13] powerpc/perf: Thread imc cpuhotplug support

2017-03-27 Thread Anju T Sudhakar



On Thursday 23 March 2017 10:45 PM, Gautham R Shenoy wrote:

Hi Maddy, Anju,

On Thu, Mar 16, 2017 at 01:05:06PM +0530, Madhavan Srinivasan wrote:

From: Anju T Sudhakar 

This patch adds support for thread IMC on cpuhotplug.

When a cpu goes offline, the LDBAR for that cpu is disabled, and when it comes
back online the previous ldbar value is written back to the LDBAR for that cpu.

To register the hotplug functions for thread_imc, a new state
CPUHP_AP_PERF_POWERPC_THREADIMC_ONLINE is added to the list of existing
states.

Cc: Gautham R. Shenoy 
Cc: Balbir Singh 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Anton Blanchard 
Cc: Sukadev Bhattiprolu 
Cc: Michael Neuling 
Cc: Stewart Smith 
Cc: Daniel Axtens 
Cc: Stephane Eranian 
Signed-off-by: Anju T Sudhakar 
Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/perf/imc-pmu.c | 33 -
  include/linux/cpuhotplug.h  |  1 +
  2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 6802960db51c..2ff39fe2a5ce 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -687,6 +687,16 @@ static void cleanup_all_thread_imc_memory(void)
on_each_cpu(cleanup_thread_imc_memory, NULL, 1);
  }

+static void thread_imc_update_ldbar(unsigned int cpu_id)
+{
+   u64 ldbar_addr, ldbar_value;
+
+   ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
+   ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
+   (u64)THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+}
+
  /*
   * Allocates a page of memory for each of the online cpus, and, writes the
   * physical base address of that page to the LDBAR for that cpu. This starts
@@ -694,20 +704,33 @@ static void cleanup_all_thread_imc_memory(void)
   */
  static void thread_imc_mem_alloc(void *dummy)
  {
-   u64 ldbar_addr, ldbar_value;
int cpu_id = smp_processor_id();

per_cpu_add[cpu_id] = (u64)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
0);
-   ldbar_addr = (u64)virt_to_phys((void *)per_cpu_add[cpu_id]);
-   ldbar_value = (ldbar_addr & (u64)THREAD_IMC_LDBAR_MASK) |
-   (u64)THREAD_IMC_ENABLE;
-   mtspr(SPRN_LDBAR, ldbar_value);
+   thread_imc_update_ldbar(cpu_id);
+}
+
+static int ppc_thread_imc_cpu_online(unsigned int cpu)
+{
+   thread_imc_update_ldbar(cpu);
+   return 0;
+
+}
+
+static int ppc_thread_imc_cpu_offline(unsigned int cpu)
+{
+   mtspr(SPRN_LDBAR, 0);
+   return 0;
  }

This patch looks ok to me.

So it appears that in case of a full-core deep stop entry/exit you
will need to save/restore LDBAR as well. But I will take it up for the
next set of stop cleanups.

For this patch,
Reviewed-by: Gautham R. Shenoy 


Thank you for  reviewing the patch Gautham.

-Anju




  void thread_imc_cpu_init(void)
  {
on_each_cpu(thread_imc_mem_alloc, NULL, 1);
+   cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_THREADIMC_ONLINE,
+ "POWER_THREAD_IMC_ONLINE",
+  ppc_thread_imc_cpu_online,
+  ppc_thread_imc_cpu_offline);
  }

  static void thread_imc_ldbar_disable(void *dummy)
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index abde85d9511a..724df46b2c3c 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -138,6 +138,7 @@ enum cpuhp_state {
CPUHP_AP_PERF_ARM_L2X0_ONLINE,
CPUHP_AP_PERF_POWERPC_NEST_ONLINE,
CPUHP_AP_PERF_POWERPC_COREIMC_ONLINE,
+   CPUHP_AP_PERF_POWERPC_THREADIMC_ONLINE,
CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE,
CPUHP_AP_WORKQUEUE_ONLINE,
CPUHP_AP_RCUTREE_ONLINE,
--
2.7.4


--
Thanks and Regards
gautham.




Re: [PATCH v5 06/13] powerpc/perf: IMC pmu cpumask and cpu hotplug support

2017-03-27 Thread Anju T Sudhakar

Hi Gautham,


Thank you for reviewing the patch.


On Thursday 23 March 2017 05:22 PM, Gautham R Shenoy wrote:

Hi Hemant, Maddy,

On Thu, Mar 16, 2017 at 01:05:00PM +0530, Madhavan Srinivasan wrote:

From: Hemant Kumar 

Adds cpumask attribute to be used by each IMC pmu. Only one cpu (any
online CPU) from each chip for nest PMUs is designated to read counters.

On CPU hotplug, dying CPU is checked to see whether it is one of the
designated cpus, if yes, next online cpu from the same chip (for nest
units) is designated as new cpu to read counters. For this purpose, we
introduce a new state : CPUHP_AP_PERF_POWERPC_NEST_ONLINE.

Cc: Gautham R. Shenoy 
Cc: Balbir Singh 
Cc: Benjamin Herrenschmidt 
Cc: Paul Mackerras 
Cc: Anton Blanchard 
Cc: Sukadev Bhattiprolu 
Cc: Michael Neuling 
Cc: Stewart Smith 
Cc: Daniel Axtens 
Cc: Stephane Eranian 
Cc: Anju T Sudhakar 
Signed-off-by: Hemant Kumar 
Signed-off-by: Madhavan Srinivasan 
---
  arch/powerpc/include/asm/opal-api.h|   3 +-
  arch/powerpc/include/asm/opal.h|   3 +
  arch/powerpc/perf/imc-pmu.c| 163 -
  arch/powerpc/platforms/powernv/opal-wrappers.S |   1 +
  include/linux/cpuhotplug.h |   1 +
  5 files changed, 169 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index a0aa285869b5..e1c3d4837857 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -168,7 +168,8 @@
  #define OPAL_INT_SET_MFRR 125
  #define OPAL_PCI_TCE_KILL 126
  #define OPAL_NMMU_SET_PTCR127
-#define OPAL_LAST  127
+#define OPAL_NEST_IMC_COUNTERS_CONTROL 145
+#define OPAL_LAST  145

  /* Device tree flags */

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 1ff03a6da76e..d93d08204243 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -227,6 +227,9 @@ int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t 
kill_type,
  uint64_t dma_addr, uint32_t npages);
  int64_t opal_nmmu_set_ptcr(uint64_t chip_id, uint64_t ptcr);

+int64_t opal_nest_imc_counters_control(uint64_t mode, uint64_t value1,
+   uint64_t value2, uint64_t value3);
+
  /* Internal functions */
  extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
   int depth, void *data);
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index f6f1ef9f56af..e46ff6d2a584 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -16,6 +16,7 @@
+static int ppc_nest_imc_cpu_online(unsigned int cpu)
+{

I take it that 'cpu' is coming online.


+   int nid, fcpu, ncpu;
+   struct cpumask *l_cpumask, tmp_mask;
+
+   /* Fint the cpumask of this node */
+   nid = cpu_to_node(cpu);
+   l_cpumask = cpumask_of_node(nid);
+
+   /*
+* If any of the cpu from this node is already present in the mask,
+* just return, if not, then set this cpu in the mask.
+*/
+   if (!cpumask_and(&tmp_mask, l_cpumask, &nest_imc_cpumask)) {

In this case, none of the cpus in the node are in the mask. So we set
and this cpu in the imc cpumask and return.


+   cpumask_set_cpu(cpu, &nest_imc_cpumask);
+   return 0;
+   }

But this case implies that there is already a CPU from the node which
is in the imc_cpumask. As per the comment above, we are supposed to
just return. So why are we doing the following ?

Either the comment above is incorrect or I am missing something here.


+
+   fcpu = cpumask_first(l_cpumask);
+   ncpu = cpumask_next(cpu, l_cpumask);
+   if (cpu == fcpu) {
+   if (cpumask_test_and_clear_cpu(ncpu, &nest_imc_cpumask)) {
+   cpumask_set_cpu(cpu, &nest_imc_cpumask);
+   nest_change_cpu_context(ncpu, cpu);
+   }
+   }

It seems that we want to set only the smallest online cpu in the node
in the nest_imc_cpumask. So, if the newly onlined cpu is the smallest,
we replace the previous representative with cpu.


Yes. you are right. Here we are designating the smallest online cpu in 
the node in the

nest_imc_mask. The comment above is only for the 'if' code block.



So, the comment above needs to be fixed.


Will update the comment to avoid confusion. :-)


Thanks,

Anju

+
+   return 0;
+}
+
+static int ppc_nest_imc_cpu_offline(unsigned int cpu)
+{
+   int nid, target = -1;
+   struct cpumask *l_cpumask;
+
+   /*
+* Check in the designated list for this cpu. Dont bother
+* if not one of them.
+*/
+   if (!cpumask_test_and_clear_cpu(cpu, &nest_imc_cpumask))
+   return 0;
+
+   /*
+* Now that this cpu is one of the

Re: [PATCH v9 2/3] PCI: Add a macro to set default alignment for all PCI devices

2017-03-27 Thread Benjamin Herrenschmidt
On Mon, 2017-03-27 at 21:17 +1100, Michael Ellerman wrote:
> > If so, is it acceptable to force that kernel to user 64K alignment
> > even
> > when it's running on non-PowerNV systems?
> 
> Probably, but I'm not sure TBH. Benh will know, I'll try and get his
> attention.

Can we make the alignment PAGE_SIZE ? I think that should be ok as long
as it doesn't try to re-allocate BARs for devices that have already
been properly allocated by firmware (ie, PowerMac, if you mess around
with the MacIO chip on these, bad things will happen).

Cheers,
Ben.



Re: [PATCH v9 2/3] PCI: Add a macro to set default alignment for all PCI devices

2017-03-27 Thread Michael Ellerman
Bjorn Helgaas  writes:
>
> I don't know how powerpc kernels are built: can you build a kernel
> that will run on PowerNV as well as on different powerpc systems?

Yes you can. But there are some restrictions.

You can only build a 64-bit OR a 32-bit kernel.

And within 64-bit you can only build a *kernel* for the "server
architecture (~= IBM CPUs)" or the "embedded architecture
(Freescale/NXP)".

So in practice you can build a 64-bit kernel that supports:
 - powernv (bare metal on IBM server chips)
 - pseries (virtualised on IBM server chips or qemu)
 - powermac (Apple G5s)
 - pasemi (long dead but still used by some Amiga folks?)
 - Cell/PS3 (also long dead)

> If so, is it acceptable to force that kernel to user 64K alignment even
> when it's running on non-PowerNV systems?

Probably, but I'm not sure TBH. Benh will know, I'll try and get his
attention.

cheers


Re: [PATCH kernel v11 02/10] powerpc/powernv/iommu: Add real mode version of iommu_table_ops::exchange()

2017-03-27 Thread Michael Ellerman
Alexey Kardashevskiy  writes:

> diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
> index 5f202a566ec5..9bace5df05d5 100644
> --- a/arch/powerpc/kernel/iommu.c
> +++ b/arch/powerpc/kernel/iommu.c
> @@ -1004,6 +1004,29 @@ long iommu_tce_xchg(struct iommu_table *tbl, unsigned 
> long entry,
>  }
>  EXPORT_SYMBOL_GPL(iommu_tce_xchg);
>  
> +long iommu_tce_xchg_rm(struct iommu_table *tbl, unsigned long entry,
> + unsigned long *hpa, enum dma_data_direction *direction)
> +{
> + long ret;
> +
> + ret = tbl->it_ops->exchange_rm(tbl, entry, hpa, direction);
> +
> + if (!ret && ((*direction == DMA_FROM_DEVICE) ||
> + (*direction == DMA_BIDIRECTIONAL))) {
> + struct page *pg = realmode_pfn_to_page(*hpa >> PAGE_SHIFT);

This breaks the corenet64_smp_defconfig build:

arch/powerpc/kernel/iommu.c:1037:21: error: implicit declaration of function 
'realmode_pfn_to_page' [-Werror=implicit-function-declaration]


I think you can probably just put it inside #ifdef CONFIG_PPC_BOOK3S_64.

cheers


[PATCH] kvm: powerpc: book3s: Disable preemption while accessing paca xics_phys filed

2017-03-27 Thread Denis Kirjanov
With CONFIG_DEBUG_PREEMPT enabled we see the following trace:

[  129.314426] BUG: using smp_processor_id() in preemptible []
code: modprobe/5459
[  129.314580] caller is .kvmppc_book3s_init_hv+0x184/0x350 [kvm_hv]
[  129.314697] CPU: 11 PID: 5459 Comm: modprobe Not tainted
4.11.0-rc3-00022-gc7e790c #1
[  129.314848] Call Trace:
[  129.314915] [c007d624b810] [c23eef10] .dump_stack+0xe4/0x150 
(unreliable)
[  129.315065] [c007d624b8a0] [c1f6ec04] 
.check_preemption_disabled+0x134/0x150
[  129.315210] [c007d624b940] [da010274] 
.kvmppc_book3s_init_hv+0x184/0x350 [kvm_hv]
[  129.315339] [c007d624ba00] [c191d5cc] .do_one_initcall+0x5c/0x1c0
[  129.315416] [c007d624bad0] [c23e9494] .do_init_module+0x84/0x240
[  129.315492] [c007d624bb70] [c1aade18] .load_module+0x1f68/0x2a10
[  129.315568] [c007d624bd20] [c1aaeb30] .SyS_finit_module+0xc0/0xf0
[  129.315645] [c007d624be30] [c191baec] system_call+0x38/0xfc

Disable preemption while accessing paca struct to avoid inconsistent state

Fixes: f725758 ("KVM: PPC: Book3S HV: Use OPAL XICS emulation on POWER9")
Signed-off-by: Denis Kirjanov 
---
 arch/powerpc/kvm/book3s_hv.c | 10 +-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 1ec86d9..8e96bab 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3913,6 +3913,10 @@ static int kvmppc_radix_possible(void)
 static int kvmppc_book3s_init_hv(void)
 {
int r;
+#ifdef CONFIG_SMP
+   unsigned long xics_phys;
+#endif
+
/*
 * FIXME!! Do we need to check on all cpus ?
 */
@@ -3930,7 +3934,11 @@ static int kvmppc_book3s_init_hv(void)
 * indirectly, via OPAL.
 */
 #ifdef CONFIG_SMP
-   if (!get_paca()->kvm_hstate.xics_phys) {
+   preempt_disable();
+   xics_phys = get_paca()->kvm_hstate.xics_phys;
+   preempt_enable();
+
+   if (!xics_phys) {
struct device_node *np;
 
np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
-- 
1.8.3.1



[PATCH kernel v2] powerpc/powernv: Check kzalloc() return value in pnv_pci_table_alloc

2017-03-27 Thread Alexey Kardashevskiy
pnv_pci_table_alloc() ignores possible failure from kzalloc_node(),
this adds a check. There are 2 callers of pnv_pci_table_alloc(),
one already checks for tbl!=NULL, this adds WARN_ON() to the other path
which only happens during boot time in IODA1 and not expected to fail.

Signed-off-by: Alexey Kardashevskiy 
---
Changes:
v2:
* s/BUG_ON/WARN_ON/
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 3 +++
 arch/powerpc/platforms/powernv/pci.c  | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index e36738291c32..04ef03a5201b 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2128,6 +2128,9 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb 
*phb,
 
 found:
tbl = pnv_pci_table_alloc(phb->hose->node);
+   if (WARN_ON(!tbl))
+   return;
+
iommu_register_group(&pe->table_group, phb->hose->global_number,
pe->pe_number);
pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index eb835e977e33..9acdf6889c0d 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -766,6 +766,9 @@ struct iommu_table *pnv_pci_table_alloc(int nid)
struct iommu_table *tbl;
 
tbl = kzalloc_node(sizeof(struct iommu_table), GFP_KERNEL, nid);
+   if (!tbl)
+   return NULL;
+
INIT_LIST_HEAD_RCU(&tbl->it_group_list);
 
return tbl;
-- 
2.11.0



Re: [PATCH kernel] powerpc/powernv: Check kzalloc() return in pnv_pci_table_alloc

2017-03-27 Thread Michael Ellerman
Alexey Kardashevskiy  writes:

> pnv_pci_table_alloc() ignores posible failure from kzalloc_node(),
> this adds a check. There are 2 callers of pnv_pci_table_alloc(),
> one already checks for tbl!=NULL, this adds BUG_ON() to the other path
> which only happens during boot time in IODA1 and not expected to fail.
>
> Fixes: 0eaf4defc7c4 ("powerpc/spapr: vfio: Switch from iommu_table to new 
> iommu_table_group")
> Signed-off-by: Alexey Kardashevskiy 
> ---
>  arch/powerpc/platforms/powernv/pci-ioda.c | 1 +
>  arch/powerpc/platforms/powernv/pci.c  | 3 +++
>  2 files changed, 4 insertions(+)
>
> diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
> b/arch/powerpc/platforms/powernv/pci-ioda.c
> index e36738291c32..dcda3f6b7d36 100644
> --- a/arch/powerpc/platforms/powernv/pci-ioda.c
> +++ b/arch/powerpc/platforms/powernv/pci-ioda.c
> @@ -2128,6 +2128,7 @@ static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb 
> *phb,
>  
>  found:
>   tbl = pnv_pci_table_alloc(phb->hose->node);
> + BUG_ON(!tbl);
>   iommu_register_group(&pe->table_group, phb->hose->global_number,
>   pe->pe_number);
>   pnv_pci_link_table_and_group(phb->hose->node, 0, tbl, &pe->table_group);

Please don't add BUG_ON()'s for non-critical things like this.

I know memory allocation isn't expected to fail during boot, but don't
make people's life harder by BUG'ing.

Ideally you'd just return failure to the caller and eventually we'd
print a message at some higher level. In a case like this where the
function returns void, rather than restructuring everything to pass
a return value you can just do:

if (WARN_ON(!tbl))
return;

cheers


[PATCH net-next] net: ibmvnic: Remove unused net_stats member from struct ibmvnic_adapter

2017-03-27 Thread Tobias Klauser
The ibmvnic driver keeps its statistics in net_device->stats, so the
net_stats member in struct ibmvnic_adapter is unused. Remove it.

Signed-off-by: Tobias Klauser 
---
 drivers/net/ethernet/ibm/ibmvnic.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmvnic.h 
b/drivers/net/ethernet/ibm/ibmvnic.h
index 10ad259208cb..42ad648c174d 100644
--- a/drivers/net/ethernet/ibm/ibmvnic.h
+++ b/drivers/net/ethernet/ibm/ibmvnic.h
@@ -953,7 +953,6 @@ struct ibmvnic_adapter {
dma_addr_t bounce_buffer_dma;
 
/* Statistics */
-   struct net_device_stats net_stats;
struct ibmvnic_statistics stats;
dma_addr_t stats_token;
struct completion stats_done;
-- 
2.11.0




[PATCH net-next] net: ibmveth: Remove unused stats member from struct ibmveth_adapter

2017-03-27 Thread Tobias Klauser
The ibmveth driver keeps its statistics in net_device->stats, so the
stats member in struct ibmveth_adapter is unused. Remove it.

Signed-off-by: Tobias Klauser 
---
 drivers/net/ethernet/ibm/ibmveth.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/ibm/ibmveth.h 
b/drivers/net/ethernet/ibm/ibmveth.h
index 7acda04d034e..ed8780cca982 100644
--- a/drivers/net/ethernet/ibm/ibmveth.h
+++ b/drivers/net/ethernet/ibm/ibmveth.h
@@ -146,7 +146,6 @@ struct ibmveth_adapter {
 struct vio_dev *vdev;
 struct net_device *netdev;
 struct napi_struct napi;
-struct net_device_stats stats;
 unsigned int mcastFilterSize;
 void * buffer_list_addr;
 void * filter_list_addr;
-- 
2.11.0