Re: [RFC PATCH v1 7/7] powerpc/bpf: Implement extended BPF on PPC32

2020-12-16 Thread Alexei Starovoitov
On Wed, Dec 16, 2020 at 10:07:37AM +, Christophe Leroy wrote:
> Implement Extended Berkeley Packet Filter on Powerpc 32
> 
> Test result with test_bpf module:
> 
>   test_bpf: Summary: 378 PASSED, 0 FAILED, [354/366 JIT'ed]

nice!

> Registers mapping:
> 
>   [BPF_REG_0] = r11-r12
>   /* function arguments */
>   [BPF_REG_1] = r3-r4
>   [BPF_REG_2] = r5-r6
>   [BPF_REG_3] = r7-r8
>   [BPF_REG_4] = r9-r10
>   [BPF_REG_5] = r21-r22 (Args 9 and 10 come in via the stack)
>   /* non volatile registers */
>   [BPF_REG_6] = r23-r24
>   [BPF_REG_7] = r25-r26
>   [BPF_REG_8] = r27-r28
>   [BPF_REG_9] = r29-r30
>   /* frame pointer aka BPF_REG_10 */
>   [BPF_REG_FP] = r31
>   /* eBPF jit internal registers */
>   [BPF_REG_AX] = r19-r20
>   [TMP_REG] = r18
> 
> As PPC32 doesn't have a redzone in the stack,
> use r17 as tail call counter.
> 
> r0 is used as temporary register as much as possible. It is referenced
> directly in the code in order to avoid misuse of it, because some
> instructions interpret it as value 0 instead of register r0
> (ex: addi, addis, stw, lwz, ...)
> 
> The following operations are not implemented:
> 
>   case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
>   case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
>   case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src 
> */
> 
> The following operations are only implemented for power of two constants:
> 
>   case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
>   case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */

Those are sensible limitations. MOD and DIV are rare, but XADD is common.
Please consider doing it as a cmpxchg loop in the future.

Also please run test_progs. It will give a lot better coverage than test_bpf.ko


Re: [PATCH v2 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE

2020-12-16 Thread David Gibson
On Wed, Dec 16, 2020 at 02:24:46PM +0530, Bharata B Rao wrote:
> Implement H_RPT_INVALIDATE hcall and add KVM capability
> KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.
> 
> This hcall does two types of TLB invalidations:
> 
> 1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
>This is currently not used in KVM as GTSE is not usually
>disabled in KVM.
> 2. Partition-scoped invalidations that an L1 hypervisor does on
>behalf of an L2 guest. This replaces the uses of the existing
>hcall H_TLB_INVALIDATE.
> 
> Signed-off-by: Bharata B Rao 
> ---
>  Documentation/virt/kvm/api.rst|  17 +++
>  .../include/asm/book3s/64/tlbflush-radix.h|  18 +++
>  arch/powerpc/include/asm/kvm_book3s.h |   3 +
>  arch/powerpc/kvm/book3s_hv.c  | 121 ++
>  arch/powerpc/kvm/book3s_hv_nested.c   |  94 ++
>  arch/powerpc/kvm/powerpc.c|   3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c  |   4 -
>  include/uapi/linux/kvm.h  |   1 +
>  8 files changed, 257 insertions(+), 4 deletions(-)
> 
> diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
> index e00a66d72372..5ce237c0d707 100644
> --- a/Documentation/virt/kvm/api.rst
> +++ b/Documentation/virt/kvm/api.rst
> @@ -6014,6 +6014,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit 
> notifications which user space
>  can then handle to implement model specific MSR handling and/or user 
> notifications
>  to inform a user that an MSR was not handled.
>  
> +7.22 KVM_CAP_PPC_RPT_INVALIDATE
> +--
> +
> +:Capability: KVM_CAP_PPC_RPT_INVALIDATE
> +:Architectures: ppc
> +:Type: vm
> +
> +This capability indicates that the kernel is capable of handling
> +H_RPT_INVALIDATE hcall.
> +
> +In order to enable the use of H_RPT_INVALIDATE in the guest,
> +user space might have to advertise it for the guest. For example,
> +IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
> +present in the "ibm,hypertas-functions" device-tree property.
> +
> +This capability is always enabled.
> +
>  8. Other capabilities.
>  ==
>  
> diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
> b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> index 94439e0cefc9..aace7e9b2397 100644
> --- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
> @@ -4,6 +4,10 @@
>  
>  #include 
>  
> +#define RIC_FLUSH_TLB 0
> +#define RIC_FLUSH_PWC 1
> +#define RIC_FLUSH_ALL 2
> +
>  struct vm_area_struct;
>  struct mm_struct;
>  struct mmu_gather;
> @@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
>   return H_RPTI_PAGE_ALL;
>  }
>  
> +static inline int rpti_pgsize_to_psize(unsigned long page_size)
> +{
> + if (page_size == H_RPTI_PAGE_4K)
> + return MMU_PAGE_4K;
> + if (page_size == H_RPTI_PAGE_64K)
> + return MMU_PAGE_64K;
> + if (page_size == H_RPTI_PAGE_2M)
> + return MMU_PAGE_2M;
> + if (page_size == H_RPTI_PAGE_1G)
> + return MMU_PAGE_1G;
> + else
> + return MMU_PAGE_64K; /* Default */
> +}
> +
>  static inline int mmu_get_ap(int psize)
>  {
>   return mmu_psize_defs[psize].ap;
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
> b/arch/powerpc/include/asm/kvm_book3s.h
> index d32ec9ae73bd..0f1c5fa6e8ce 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 
> dw1);
>  void kvmhv_release_all_nested(struct kvm *kvm);
>  long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
>  long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
> +long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
> +  unsigned long type, unsigned long pg_sizes,
> +  unsigned long start, unsigned long end);
>  int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
> u64 time_limit, unsigned long lpcr);
>  void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
> diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
> index e3b1839fc251..adf2d1191581 100644
> --- a/arch/powerpc/kvm/book3s_hv.c
> +++ b/arch/powerpc/kvm/book3s_hv.c
> @@ -904,6 +904,118 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
>   return yield_count;
>  }
>  
> +static inline void do_tlb_invalidate_all(unsigned long rb, unsigned long rs)
> +{
> + asm volatile("ptesync" : : : "memory");
> + asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
> + : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_ALL), "r"(rs)
> + : "memory");
> + asm volatile("eieio; tlbsync; ptesync" : : : "memory");
> +}
> +
> +static inline void do_tlb_invalidate_pwc(unsigned long rb, unsigned long rs)
> +{

Re: [PATCH v2 0/2] Support for H_RPT_INVALIDATE in PowerPC KVM

2020-12-16 Thread David Gibson
On Wed, Dec 16, 2020 at 02:24:45PM +0530, Bharata B Rao wrote:
> This patchset adds support for the new hcall H_RPT_INVALIDATE
> and replaces the nested tlb flush calls with this new hcall
> if support for the same exists.
> 
> Changes in v2:
> -
> - Not enabling the hcall by default now, userspace can enable it when
>   required.
> - Added implementation for process-scoped invalidations in the hcall.
> 
> v1: 
> https://lore.kernel.org/linuxppc-dev/20201019112642.53016-1-bhar...@linux.ibm.com/T/#t
> 
> H_RPT_INVALIDATE
> 
> Syntax:
> int64   /* H_Success: Return code on successful completion */
>     /* H_Busy - repeat the call with the same */
>     /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid parameters */
>     hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT translation 
> lookaside information */
>   uint64 pid,   /* PID/LPID to invalidate */
>   uint64 target,    /* Invalidation target */
>   uint64 type,  /* Type of lookaside information */
>   uint64 pageSizes, /* Page sizes */
>   uint64 start, /* Start of Effective Address (EA) range 
> (inclusive) */
>   uint64 end)   /* End of EA range (exclusive) */
> 
> Invalidation targets (target)
> -
> Core MMU    0x01 /* All virtual processors in the partition */
> Core local MMU  0x02 /* Current virtual processor */
> Nest MMU    0x04 /* All nest/accelerator agents in use by the partition */
> 
> A combination of the above can be specified, except core and core local.
> 
> Type of translation to invalidate (type)
> ---
> NESTED   0x0001  /* Invalidate nested guest partition-scope */
> TLB  0x0002  /* Invalidate TLB */
> PWC  0x0004  /* Invalidate Page Walk Cache */
> PRT  0x0008  /* Invalidate Process Table Entries if NESTED is clear */
> PAT  0x0008  /* Invalidate Partition Table Entries if NESTED is set */
> 
> A combination of the above can be specified.
> 
> Page size mask (pageSizes)
> --
> 4K  0x01
> 64K 0x02
> 2M  0x04
> 1G  0x08
> All sizes   (-1UL)

PAPR really has a real talent for tying its own shoelaces together.
They could have just made the bit for each pagesize be... the size of
the page, but why use something obviously extensible to any future
pagesizes when we can make it both less flexible and more complicated
to deal with.  Sigh.

> 
> A combination of the above can be specified.
> All page sizes can be selected with -1.
> 
> Semantics: Invalidate radix tree lookaside information
>    matching the parameters given.
> * Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters are
>   different from the defined values.
> * Return H_PARAMETER if NESTED is set and pid is not a valid nested
>   LPID allocated to this partition
> * Return H_P5 if (start, end) doesn't form a valid range. Start and end
>   should be a valid Quadrant address and  end > start.
> * Return H_NotSupported if the partition is not in running in radix
>   translation mode.
> * May invalidate more translation information than requested.
> * If start = 0 and end = -1, set the range to cover all valid addresses.
>   Else start and end should be aligned to 4kB (lower 11 bits clear).
> * If NESTED is clear, then invalidate process scoped lookaside information.
>   Else pid specifies a nested LPID, and the invalidation is performed
>   on nested guest partition table and nested guest partition scope real
>   addresses.
> * If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 and
>   quadrant 0 spaces, Else valid addresses are quadrant 0.
> * Pages which are fully covered by the range are to be invalidated.
>   Those which are partially covered are considered outside invalidation
>   range, which allows a caller to optimally invalidate ranges that may
>   contain mixed page sizes.
> * Return H_SUCCESS on success.
> 
> Bharata B Rao (2):
>   KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
>   KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM
> 
>  Documentation/virt/kvm/api.rst|  17 +++
>  .../include/asm/book3s/64/tlbflush-radix.h|  18 +++
>  arch/powerpc/include/asm/kvm_book3s.h |   3 +
>  arch/powerpc/kvm/book3s_64_mmu_radix.c|  27 +++-
>  arch/powerpc/kvm/book3s_hv.c  | 121 ++
>  arch/powerpc/kvm/book3s_hv_nested.c   | 106 ++-
>  arch/powerpc/kvm/powerpc.c|   3 +
>  arch/powerpc/mm/book3s64/radix_tlb.c  |   4 -
>  include/uapi/linux/kvm.h  |   1 +
>  9 files changed, 289 insertions(+), 11 deletions(-)
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
 

powerpc VDSO files being unnecessarily rebuilt

2020-12-16 Thread Michael Ellerman
Hi all,

Since the merge of the C VDSO I see we are repeatedly rebuilding some
files in the VDSO, eg:

  $ make V=2
  make[1]: Entering directory '/home/michael/linux/build~'
GEN Makefile
CALL/home/michael/linux/scripts/checksyscalls.sh - due to target missing
CALL/home/michael/linux/scripts/atomic/check-atomics.sh - due to target 
missing
CHK include/generated/compile.h
CC  arch/powerpc/kernel/vdso64/vgettimeofday.o - due to vgettimeofday.o 
not in $(targets)

This then causes multiple other files to be rebuilt.

So the obvious fix is to add it to targets:

diff --git a/arch/powerpc/kernel/vdso64/Makefile 
b/arch/powerpc/kernel/vdso64/Makefile
index d365810a689a..5386532866ce 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -5,6 +5,7 @@ ARCH_REL_TYPE_ABS := 
R_PPC_JUMP_SLOT|R_PPC_GLOB_DAT|R_PPC_ADDR32|R_PPC_ADDR24|R_
 include $(srctree)/lib/vdso/Makefile
 
 obj-vdso64 = sigtramp.o gettimeofday.o datapage.o cacheflush.o note.o getcpu.o
+targets := $(obj-vdso64) vdso64.so.dbg
 
 ifneq ($(c-gettimeofday-y),)
   CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
@@ -13,11 +14,11 @@ ifneq ($(c-gettimeofday-y),)
   CFLAGS_vgettimeofday.o += -DDISABLE_BRANCH_PROFILING
   CFLAGS_vgettimeofday.o += -ffreestanding -fasynchronous-unwind-tables
   CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE)
+  targets += vgettimeofday.o
 endif
 
 # Build rules
 
-targets := $(obj-vdso64) vdso64.so.dbg
 obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64))
 
 GCOV_PROFILE := n


But then I see it still rebuilt:

  CC  arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line 
change


I'm not changing the command line, and AFAICS the .cmd file is not
changing either:

  $ make V=2
  ...
CC  arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line 
change
  
  $ sha256sum build\~/arch/powerpc/kernel/vdso64/vgettimeofday.o
  7f635546bc2768c7b929d3de1724d83285f3cd54394fcd7104f8b1301d689d65  
build~/arch/powerpc/kernel/vdso64/vgettimeofday.o
  
  $ make V=2
  ...
CC  arch/powerpc/kernel/vdso64/vgettimeofday.o - due to command line 
change
  
  $ sha256sum build\~/arch/powerpc/kernel/vdso64/vgettimeofday.o
  7f635546bc2768c7b929d3de1724d83285f3cd54394fcd7104f8b1301d689d65  
build~/arch/powerpc/kernel/vdso64/vgettimeofday.o


So any hints on what I'm missing here?

cheers


[PATCH] powerpc/64s/kuap: Use mmu_has_feature()

2020-12-16 Thread Michael Ellerman
In commit 8150a153c013 ("powerpc/64s: Use early_mmu_has_feature() in
set_kuap()") we switched the KUAP code to use early_mmu_has_feature(),
to avoid a bug where we called set_kuap() before feature patching had
been done, leading to recursion and crashes.

That path, which called probe_kernel_read() from printk(), has since
been removed, see commit 2ac5a3bf7042 ("vsprintf: Do not break early
boot with probing addresses").

Additionally probe_kernel_read() no longer invokes any KUAP routines,
since commit fe557319aa06 ("maccess: rename probe_kernel_{read,write}
to copy_{from,to}_kernel_nofault") and c33165253492 ("powerpc: use
non-set_fs based maccess routines").

So it should now be safe to use mmu_has_feature() in the KUAP
routines, because we shouldn't invoke them prior to feature patching.

This is essentially a revert of commit 8150a153c013 ("powerpc/64s: Use
early_mmu_has_feature() in set_kuap()"), but we've since added a
second usage of early_mmu_has_feature() in get_kuap(), so we convert
that to use mmu_has_feature() as well.

Depends-on: c33165253492 ("powerpc: use non-set_fs based maccess routines").
Reported-by: Christophe Leroy 
Signed-off-by: Michael Ellerman 
---
 arch/powerpc/include/asm/book3s/64/kup.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index f50f72e535aa..2298eac49763 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -333,7 +333,7 @@ static inline unsigned long get_kuap(void)
 * This has no effect in terms of actually blocking things on hash,
 * so it doesn't break anything.
 */
-   if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+   if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
return AMR_KUAP_BLOCKED;
 
return mfspr(SPRN_AMR);
@@ -341,7 +341,7 @@ static inline unsigned long get_kuap(void)
 
 static inline void set_kuap(unsigned long value)
 {
-   if (!early_mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
+   if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
return;
 
/*
-- 
2.25.1



Re: linux-next: manual merge of the akpm-current tree with the powerpc tree

2020-12-16 Thread Stephen Rothwell
Hi all,

On Tue, 8 Dec 2020 20:40:16 +1100 Stephen Rothwell  
wrote:
>
> Today's linux-next merge of the akpm-current tree got conflicts in:
> 
>   drivers/misc/lkdtm/Makefile
>   drivers/misc/lkdtm/lkdtm.h
>   tools/testing/selftests/lkdtm/tests.txt
> 
> between commit:
> 
>   3ba150fb2120 ("lkdtm/powerpc: Add SLB multihit test")
> 
> from the powerpc tree and commit:
> 
>   014a486edd8a ("drivers/misc/lkdtm: add new file in LKDTM to test fortified 
> strscpy")
> 
> from the akpm-current tree.
> 
> I fixed it up (see below) and can carry the fix as necessary. This
> is now fixed as far as linux-next is concerned, but any non trivial
> conflicts should be mentioned to your upstream maintainer when your tree
> is submitted for merging.  You may also want to consider cooperating
> with the maintainer of the conflicting tree to minimise any particularly
> complex conflicts.
> 
> 
> diff --cc drivers/misc/lkdtm/Makefile
> index 5a92c74eca92,d898f7b22045..
> --- a/drivers/misc/lkdtm/Makefile
> +++ b/drivers/misc/lkdtm/Makefile
> @@@ -10,7 -10,7 +10,8 @@@ lkdtm-$(CONFIG_LKDTM)   += rodata_objcop
>   lkdtm-$(CONFIG_LKDTM)   += usercopy.o
>   lkdtm-$(CONFIG_LKDTM)   += stackleak.o
>   lkdtm-$(CONFIG_LKDTM)   += cfi.o
> + lkdtm-$(CONFIG_LKDTM)   += fortify.o
>  +lkdtm-$(CONFIG_PPC_BOOK3S_64)   += powerpc.o
>   
>   KASAN_SANITIZE_stackleak.o  := n
>   KCOV_INSTRUMENT_rodata.o:= n
> diff --cc drivers/misc/lkdtm/lkdtm.h
> index 79ec05c18dd1,6aa6d6a1a839..
> --- a/drivers/misc/lkdtm/lkdtm.h
> +++ b/drivers/misc/lkdtm/lkdtm.h
> @@@ -102,7 -104,7 +104,10 @@@ void lkdtm_STACKLEAK_ERASING(void)
>   /* cfi.c */
>   void lkdtm_CFI_FORWARD_PROTO(void);
>   
> + /* fortify.c */
> + void lkdtm_FORTIFIED_STRSCPY(void);
> + 
>  +/* powerpc.c */
>  +void lkdtm_PPC_SLB_MULTIHIT(void);
>  +
>   #endif
> diff --cc tools/testing/selftests/lkdtm/tests.txt
> index 18e4599863c0,92ba4cc41314..
> --- a/tools/testing/selftests/lkdtm/tests.txt
> +++ b/tools/testing/selftests/lkdtm/tests.txt
> @@@ -68,4 -68,4 +68,5 @@@ USERCOPY_STACK_BEYON
>   USERCOPY_KERNEL
>   STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
>   CFI_FORWARD_PROTO
> + FORTIFIED_STRSCPY
>  +PPC_SLB_MULTIHIT Recovered

These conflicts are now between the powerpc tree and Linus' tree.

-- 
Cheers,
Stephen Rothwell


pgp1vUlcTzct_.pgp
Description: OpenPGP digital signature


Re: [PATCH] arch: fix 'unexpected IRQ trap at vector' warnings

2020-12-16 Thread Enrico Weigelt, metux IT consult
On 15.12.20 23:12, Thomas Gleixner wrote:
> On Tue, Dec 15 2020 at 21:12, Enrico Weigelt wrote:
>> On 09.12.20 00:01, Thomas Gleixner wrote:
>>>   3) It's invoked from __handle_domain_irq() when the 'hwirq' which is
>>>  handed in by the caller does not resolve to a mapped Linux
>>>  interrupt which is pretty much the same as the x86 situation above
>>>  in #1, but it prints useless data.
>>>
>>>  It prints 'irq' which is invalid but it does not print the really
>>>  interesting 'hwirq' which was handed in by the caller and did
>>>  not resolve.
>>
>> I wouldn't say the irq-nr isn't interesting. In my particular case it
>> was quite what I've been looking for. But you're right, hwirq should
>> also be printed.
> 
> The number is _not_ interesting in this case. It's useless because the
> function does:

Oh, I've mixed up the cases - I only had the other one, down below.

> irq = hwirq;
> 
> if (lookup)
> irq = find_mapping(hwirq);
> 
> if (!irq || irq >= nr_irqs)
>-> BAD

When exactly can that happen ? Only when some hardware sending an IRQ,
but no driver listening to it, or are there other cases ?

By the way: who's supposed to call that function ? Only irqchip's
(and the few soc specific 1st-level irq handlers) ? I'm asking, because
we have lots of gpio drivers, which have their own irq domain, but go
the generic_handle_irq() route. Same for some SOC-specific irqchips.

Should they also call handle_domain_irq() instead ?

> In both cases the only interesting information is that hwirq does not
> resolve to a valid Linux interrupt number and which hwirq number caused
> that.

Don't we also need know which irqchip the hwirq number belongs to ?

> If you look really then you find out that there is exactly _ONE_
> architecture which does anything else than incrementing a counter and/or
> printing stuff: X86, which has a big fat comment explaining why. The
> only way to ack an interrupt on X86 is to issue EOI on the local APIC,
> i.e. it does _not_ need any further information.

Yeah, found it :)

At this point I wonder whether the ack_APIC_irq() call could be done
somewhere further up in the call chain, eg. handle_irq() or
common_interrupt() ?

If that works, we IMHO could drop ack_bad_irq() completely (except for
the counter and printk, which we could consolidate elsewhere anyways)

>> ... rethinking this further ... shouldn't we also pass in even more data
>> (eg. irq_desc, irqchip, ...), so this function can check which hw to
>> actually talk to ?
> 
> There are 3 ways to get there:
> 
>   1) via dummy chip which obviously has no hardware associated

... which also calls print_irq_desc() ..

>   2) via handle_bad_irq() which prints the info already

print_irq_desc() doesn't seem to print the hwirq ... shall we fix this ?

>   3) __handle_domain_irq() which cannot print anything and obviously
>  cannot figure out the hw to talk to because there is no irq
>  descriptor associated.

Okay, what's the conclusion ? Drop printouts in the ack_bad_irq()'s ?

>>>   4) It's invoked from the dummy irq chip which is installed for a
>>>  couple of truly virtual interrupts where the invocation of
>>>  dummy_irq_chip::irq_ack() is indicating wreckage.
>>>
>>>  In that case the Linux irq number is the thing which is printed.
>>>
>>> So no. It's not just inconsistent it's in some places outright
>>> wrong. What we really want is:
>>>
>>> ack_bad_irq(int hwirq, int virq)
>>
>> is 'int' correct here ?
> 
> This was just for illustration.

Okay, thanks. Just discovered already have an irq_hw_number_t, which
doesn't seem to be used everywhere ... shall we fix that ?

>> OTOH: since both callers (dummychip.c, handle.c) already dump out before
>> ack_bad_irq(), do we need to print out anything at all ?
> 
> Not all callers print something, but yes this could do with some general
> cleanup.

I've found three callers, only one (__handle_domain_irq() in irqdesc.c)
doesn't print out anything. I belive, adding a pr_warn() here and drop
all the printouts in ack_bad_irq()'s makes sense.

> The error counter is independent of that, but yes there is room for
> consolidation.

Ok, I've already started hacking a bit here: adding an atomic_t counter
in kernel/irq/handle.c and inline'd accessor functions in
include/asm-generic/irq.h (just feeling that accessors are a bit cleaner
than direct access). Would that be okay ?

By the way: I still wonder whether my case should have ever reached
ack_bad_irq().

The irqdescs had been allocated via devm_irq_alloc_descs(), and the
driver just called generic_handle_irq() with base irq + gpio nr.
So, IMHO it was a valid linux irq number, but no (explicit) handler.

I wonder whether ack'ing those virtual irqs onto hw could be harmful.


--mtx

-- 
---
Hinweis: unverschlüsselte E-Mails können leicht abgehört und manipuliert
werden ! Für eine vertrauliche Kommunikation senden Sie bitte ihren
GPG/PGP-Schlüssel zu.
---

[PATCH -next] pci/controller/dwc: convert comma to semicolon

2020-12-16 Thread Zheng Yongjun
Replace a comma between expression statements by a semicolon.

Signed-off-by: Zheng Yongjun 
---
 drivers/pci/controller/dwc/pci-layerscape-ep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/pci/controller/dwc/pci-layerscape-ep.c 
b/drivers/pci/controller/dwc/pci-layerscape-ep.c
index 84206f265e54..917ba8d254fc 100644
--- a/drivers/pci/controller/dwc/pci-layerscape-ep.c
+++ b/drivers/pci/controller/dwc/pci-layerscape-ep.c
@@ -178,7 +178,7 @@ static int __init ls_pcie_ep_probe(struct platform_device 
*pdev)
pci->dev = dev;
pci->ops = pcie->drvdata->dw_pcie_ops;
 
-   ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
+   ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
 
pcie->pci = pci;
pcie->ls_epc = ls_epc;
-- 
2.22.0



[PATCH -next] soc: fsl: qbman: Delete useless kfree code

2020-12-16 Thread Zheng Yongjun
The parameter of kfree function is NULL, so kfree code is useless, delete it.

Signed-off-by: Zheng Yongjun 
---
 drivers/soc/fsl/qbman/bman.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/soc/fsl/qbman/bman.c b/drivers/soc/fsl/qbman/bman.c
index c5dd026fe889..6cc1847e534a 100644
--- a/drivers/soc/fsl/qbman/bman.c
+++ b/drivers/soc/fsl/qbman/bman.c
@@ -709,7 +709,6 @@ struct bman_pool *bman_new_pool(void)
return pool;
 err:
bm_release_bpid(bpid);
-   kfree(pool);
return NULL;
 }
 EXPORT_SYMBOL(bman_new_pool);
-- 
2.22.0



[powerpc:next-test 146/304] arch/powerpc/mm/book3s64/pkeys.c:284:20: error: unused function 'update_current_thread_amr'

2020-12-16 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next-test
head:   c15d1f9d03a0f4f68bf52dffdd541c8054e6de35
commit: 48a8ab4eeb8271f2a0e2ca3cf80844a59acca153 [146/304] 
powerpc/book3s64/pkeys: Don't update SPRN_AMR when in kernel mode.
config: powerpc-randconfig-r034-20201216 (attached as .config)
compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
71601d2ac9954cb59c443cb3ae442cb106df35d4)
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# install powerpc cross compiling tool for clang build
# apt-get install binutils-powerpc-linux-gnu
# 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=48a8ab4eeb8271f2a0e2ca3cf80844a59acca153
git remote add powerpc 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
git fetch --no-tags powerpc next-test
git checkout 48a8ab4eeb8271f2a0e2ca3cf80844a59acca153
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

>> arch/powerpc/mm/book3s64/pkeys.c:284:20: error: unused function 
>> 'update_current_thread_amr' [-Werror,-Wunused-function]
   static inline void update_current_thread_amr(u64 value)
  ^
>> arch/powerpc/mm/book3s64/pkeys.c:289:20: error: unused function 
>> 'update_current_thread_iamr' [-Werror,-Wunused-function]
   static inline void update_current_thread_iamr(u64 value)
  ^
   2 errors generated.


vim +/update_current_thread_amr +284 arch/powerpc/mm/book3s64/pkeys.c

   283  
 > 284  static inline void update_current_thread_amr(u64 value)
   285  {
   286  current->thread.regs->amr = value;
   287  }
   288  
 > 289  static inline void update_current_thread_iamr(u64 value)
   290  {
   291  if (!likely(pkey_execute_disable_supported))
   292  return;
   293  
   294  current->thread.regs->iamr = value;
   295  }
   296  

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


.config.gz
Description: application/gzip


[RFC PATCH v1 3/7] powerpc/bpf: Move common helpers into bpf_jit.h

2020-12-16 Thread Christophe Leroy
Move functions bpf_flush_icache(), bpf_is_seen_register() and
bpf_set_seen_register() in order to reuse them in future
bpf_jit_comp32.c

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/net/bpf_jit.h| 35 +++
 arch/powerpc/net/bpf_jit64.h  | 19 -
 arch/powerpc/net/bpf_jit_comp64.c | 16 --
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index d0a67a1bbaf1..b8fa6908fc5e 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -108,6 +108,41 @@ static inline bool is_nearbranch(int offset)
 #define COND_LT(CR0_LT | COND_CMP_TRUE)
 #define COND_LE(CR0_GT | COND_CMP_FALSE)
 
+#define SEEN_FUNC  0x1000 /* might call external helpers */
+#define SEEN_STACK 0x2000 /* uses BPF stack */
+#define SEEN_TAILCALL  0x4000 /* uses tail calls */
+
+struct codegen_context {
+   /*
+* This is used to track register usage as well
+* as calls to external helpers.
+* - register usage is tracked with corresponding
+*   bits (r3-r10 and r27-r31)
+* - rest of the bits can be used to track other
+*   things -- for now, we use bits 16 to 23
+*   encoded in SEEN_* macros above
+*/
+   unsigned int seen;
+   unsigned int idx;
+   unsigned int stack_size;
+};
+
+static inline void bpf_flush_icache(void *start, void *end)
+{
+   smp_wmb();  /* smp write barrier */
+   flush_icache_range((unsigned long)start, (unsigned long)end);
+}
+
+static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
+{
+   return ctx->seen & (1 << (31 - i));
+}
+
+static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
+{
+   ctx->seen |= 1 << (31 - i);
+}
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index 2e33c6673ff9..b05f2e67bba1 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -86,25 +86,6 @@ static const int b2p[] = {
} while(0)
 #define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } 
while(0)
 
-#define SEEN_FUNC  0x1000 /* might call external helpers */
-#define SEEN_STACK 0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL  0x4000 /* uses tail calls */
-
-struct codegen_context {
-   /*
-* This is used to track register usage as well
-* as calls to external helpers.
-* - register usage is tracked with corresponding
-*   bits (r3-r10 and r27-r31)
-* - rest of the bits can be used to track other
-*   things -- for now, we use bits 16 to 23
-*   encoded in SEEN_* macros above
-*/
-   unsigned int seen;
-   unsigned int idx;
-   unsigned int stack_size;
-};
-
 #endif /* !__ASSEMBLY__ */
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 26a836a904f5..89599e75028c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -23,22 +23,6 @@ static void bpf_jit_fill_ill_insns(void *area, unsigned int 
size)
memset32(area, BREAKPOINT_INSTRUCTION, size/4);
 }
 
-static inline void bpf_flush_icache(void *start, void *end)
-{
-   smp_wmb();
-   flush_icache_range((unsigned long)start, (unsigned long)end);
-}
-
-static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
-{
-   return ctx->seen & (1 << (31 - i));
-}
-
-static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
-{
-   ctx->seen |= 1 << (31 - i);
-}
-
 static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
 {
/*
-- 
2.25.0



[RFC PATCH v1 0/7] Implement EBPF on powerpc32

2020-12-16 Thread Christophe Leroy
This series implements extended BPF on powerpc32. For the implementation
details, see the last patch.

Below are the results on a powerpc 885 at 132 MHz:
- with the patch, with and without bpf_jit_enable
- without the patch, with bpf_jit_enable (ie with CBPF)

With the patch, with bpf_jit_enable = 1 :

[   44.221346] test_bpf: #0 TAX jited:1 961 959 959 PASS
[   44.226287] test_bpf: #1 TXA jited:1 353 351 351 PASS
[   44.230722] test_bpf: #2 ADD_SUB_MUL_K jited:1 398 PASS
[   44.232591] test_bpf: #3 DIV_MOD_KX jited:1 914 PASS
[   44.235080] test_bpf: #4 AND_OR_LSH_K jited:1 3838 411 PASS
[   44.240892] test_bpf: #5 LD_IMM_0 jited:1 376 PASS
[   44.242773] test_bpf: #6 LD_IND jited:1 1075 1071 3584 PASS
[   44.250251] test_bpf: #7 LD_ABS jited:1 1114 1110 1110 PASS
[   44.255316] test_bpf: #8 LD_ABS_LL jited:1 4166 1686 PASS
[   44.262888] test_bpf: #9 LD_IND_LL jited:1 1128 1125 2599 PASS
[   44.269519] test_bpf: #10 LD_ABS_NET jited:1 1682 1641 PASS
[   44.274515] test_bpf: #11 LD_IND_NET jited:1 3729 1102 1102 PASS
[   44.282221] test_bpf: #12 LD_PKTTYPE jited:1 558 555 PASS
[   44.284998] test_bpf: #13 LD_MARK jited:1 323 2721 PASS
[   44.289640] test_bpf: #14 LD_RXHASH jited:1 322 320 PASS
[   44.291845] test_bpf: #15 LD_QUEUE jited:1 323 321 PASS
[   44.294053] test_bpf: #16 LD_PROTOCOL jited:1 801 4107 PASS
[   44.300752] test_bpf: #17 LD_VLAN_TAG jited:1 323 321 PASS
[   44.302964] test_bpf: #18 LD_VLAN_TAG_PRESENT jited:1 338 336 PASS
[   44.305199] test_bpf: #19 LD_IFINDEX jited:1 3689 374 PASS
[   44.310877] test_bpf: #20 LD_HATYPE jited:1 376 374 PASS
[   44.313204] test_bpf: #21 LD_CPU jited:1 710 708 PASS
[   44.316235] test_bpf: #22 LD_NLATTR jited:1 687 1058 PASS
[   44.322924] test_bpf: #23 LD_NLATTR_NEST jited:1 4327 6120 PASS
[   44.335178] test_bpf: #24 LD_PAYLOAD_OFF jited:1 12677 14527 PASS
[   44.364105] test_bpf: #25 LD_ANC_XOR jited:1 353 351 PASS
[   44.366398] test_bpf: #26 SPILL_FILL jited:1 520 517 517 PASS
[   44.372132] test_bpf: #27 JEQ jited:1 1121 646 631 PASS
[   44.376289] test_bpf: #28 JGT jited:1 1121 647 632 PASS
[   44.382895] test_bpf: #29 JGE (jt 0), test 1 jited:1 1121 647 2118 PASS
[   44.388579] test_bpf: #30 JGE (jt 0), test 2 jited:1 649 646 631 PASS
[   44.392265] test_bpf: #31 JGE jited:1 946 1020 3417 PASS
[   44.399473] test_bpf: #32 JSET jited:1 976 1042 1232 PASS
[   44.404637] test_bpf: #33 tcpdump port 22 jited:1 3662 1772 2022 PASS
[   44.414350] test_bpf: #34 tcpdump complex jited:1 2617 1757 3071 PASS
[   44.424143] test_bpf: #35 RET_A jited:1 322 321 PASS
[   44.426350] test_bpf: #36 INT: ADD trivial jited:1 345 PASS
[   44.430558] test_bpf: #37 INT: MUL_X jited:1 383 PASS
[   44.432380] test_bpf: #38 INT: MUL_X2 jited:1 406 PASS
[   44.434232] test_bpf: #39 INT: MUL32_X jited:1 360 PASS
[   44.436037] test_bpf: #40 INT: ADD 64-bit jited:1 2794 PASS
[   44.444862] test_bpf: #41 INT: ADD 32-bit jited:1 4093 PASS
[   44.450735] test_bpf: #42 INT: SUB jited:1 2276 PASS
[   44.454694] test_bpf: #43 INT: XOR jited:1 3705 PASS
[   44.459986] test_bpf: #44 INT: MUL jited:1 2420 PASS
[   44.463988] test_bpf: #45 MOV REG64 jited:1 854 PASS
[   44.466341] test_bpf: #46 MOV REG32 jited:1 855 PASS
[   44.472037] test_bpf: #47 LD IMM64 jited:1 855 PASS
[   44.474396] test_bpf: #48 INT: ALU MIX jited:0 5480 PASS
[   44.480792] test_bpf: #49 INT: shifts by register jited:1 924 PASS
[   44.483214] test_bpf: #50 check: missing ret PASS
[   44.483871] test_bpf: #51 check: div_k_0 PASS
[   44.484544] test_bpf: #52 check: unknown insn PASS
[   44.485212] test_bpf: #53 check: out of range spill/fill PASS
[   44.485880] test_bpf: #54 JUMPS + HOLES jited:1 908 PASS
[   44.496498] test_bpf: #55 check: RET X PASS
[   44.498658] test_bpf: #56 check: LDX + RET X PASS
[   44.499329] test_bpf: #57 M[]: alt STX + LDX jited:1 1455 PASS
[   44.502833] test_bpf: #58 M[]: full STX + full LDX jited:1 1318 PASS
[   44.506072] test_bpf: #59 check: SKF_AD_MAX PASS
[   44.510956] test_bpf: #60 LD [SKF_AD_OFF-1] jited:1 1091 PASS
[   44.513593] test_bpf: #61 load 64-bit immediate jited:1 413 PASS
[   44.515475] test_bpf: #62 ALU_MOV_X: dst = 2 jited:1 3535 PASS
[   44.520461] test_bpf: #63 ALU_MOV_X: dst = 4294967295 jited:1 231 PASS
[   44.522115] test_bpf: #64 ALU64_MOV_X: dst = 2 jited:1 231 PASS
[   44.523783] test_bpf: #65 ALU64_MOV_X: dst = 4294967295 jited:1 231 PASS
[   44.525431] test_bpf: #66 ALU_MOV_K: dst = 2 jited:1  PASS
[   44.531319] test_bpf: #67 ALU_MOV_K: dst = 4294967295 jited:1 216 PASS
[   44.532955] test_bpf: #68 ALU_MOV_K: 0x = 0x 
jited:1 307 PASS
[   44.534697] test_bpf: #69 ALU64_MOV_K: dst = 2 jited:1 216 PASS
[   44.536327] test_bpf: #70 ALU64_MOV_K: dst = 2147483647 jited:1 224 PASS
[   44.542389] test_bpf: #71 ALU64_OR_K: dst = 0x0 jited:1 307 PASS
[   44.544136] test_bpf: #72 ALU64_MOV_K: dst = -1 jited:1 307 PASS
[   44.545880] test_bpf: #73 ALU_ADD_X: 1 + 2 = 3 jited:1 246 PASS
[   44.550913] test_bpf: #74 ALU_ADD_X: 

[RFC PATCH v1 1/7] powerpc/bpf: Remove classical BPF support for PPC32

2020-12-16 Thread Christophe Leroy
At the time being, PPC32 has Classical BPF support.

The test_bpf module exhibits some failure:

test_bpf: #298 LD_IND byte frag jited:1 ret 202 != 66 FAIL (1 times)
test_bpf: #299 LD_IND halfword frag jited:1 ret 51958 != 17220 FAIL (1 
times)
test_bpf: #301 LD_IND halfword mixed head/frag jited:1 ret 51958 != 
1305 FAIL (1 times)
test_bpf: #303 LD_ABS byte frag jited:1 ret 202 != 66 FAIL (1 times)
test_bpf: #304 LD_ABS halfword frag jited:1 ret 51958 != 17220 FAIL (1 
times)
test_bpf: #306 LD_ABS halfword mixed head/frag jited:1 ret 51958 != 
1305 FAIL (1 times)

test_bpf: Summary: 371 PASSED, 7 FAILED, [119/366 JIT'ed]

Fixing this is not worth the effort. Instead, remove support for
classical BPF and prepare for adding Extended BPF support instead.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig|   1 -
 arch/powerpc/net/Makefile   |   4 -
 arch/powerpc/net/bpf_jit32.h| 139 ---
 arch/powerpc/net/bpf_jit_asm.S  | 226 ---
 arch/powerpc/net/bpf_jit_comp.c | 683 
 5 files changed, 1053 deletions(-)
 delete mode 100644 arch/powerpc/net/bpf_jit32.h
 delete mode 100644 arch/powerpc/net/bpf_jit_asm.S
 delete mode 100644 arch/powerpc/net/bpf_jit_comp.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cf328e199308..6d1454d31a53 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -191,7 +191,6 @@ config PPC
select HAVE_ARCH_TRACEHOOK
select HAVE_ASM_MODVERSIONS
select HAVE_C_RECORDMCOUNT
-   select HAVE_CBPF_JITif !PPC64
select HAVE_STACKPROTECTOR  if PPC64 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_STACKPROTECTOR  if PPC32 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKINGif PPC64
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index c2dec3a68d4c..52c939cef5b2 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,8 +2,4 @@
 #
 # Arch-specific network modules
 #
-ifdef CONFIG_PPC64
 obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
-else
-obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o
-endif
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
deleted file mode 100644
index 448dfd4d98e1..
--- a/arch/powerpc/net/bpf_jit32.h
+++ /dev/null
@@ -1,139 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * bpf_jit32.h: BPF JIT compiler for PPC
- *
- * Copyright 2011 Matt Evans , IBM Corporation
- *
- * Split from bpf_jit.h
- */
-#ifndef _BPF_JIT32_H
-#define _BPF_JIT32_H
-
-#include 
-#include "bpf_jit.h"
-
-#ifdef CONFIG_PPC64
-#define BPF_PPC_STACK_R3_OFF   48
-#define BPF_PPC_STACK_LOCALS   32
-#define BPF_PPC_STACK_BASIC(48+64)
-#define BPF_PPC_STACK_SAVE (18*8)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (48+64)
-#else
-#define BPF_PPC_STACK_R3_OFF   24
-#define BPF_PPC_STACK_LOCALS   16
-#define BPF_PPC_STACK_BASIC(24+32)
-#define BPF_PPC_STACK_SAVE (18*4)
-#define BPF_PPC_STACKFRAME (BPF_PPC_STACK_BASIC+BPF_PPC_STACK_LOCALS+ \
-BPF_PPC_STACK_SAVE)
-#define BPF_PPC_SLOWPATH_FRAME (24+32)
-#endif
-
-#define REG_SZ (BITS_PER_LONG/8)
-
-/*
- * Generated code register usage:
- *
- * As normal PPC C ABI (e.g. r1=sp, r2=TOC), with:
- *
- * skb r3  (Entry parameter)
- * A register  r4
- * X register  r5
- * addr param  r6
- * r7-r10  scratch
- * skb->data   r14
- * skb headlen r15 (skb->len - skb->data_len)
- * m[0]r16
- * m[...]  ...
- * m[15]   r31
- */
-#define r_skb  3
-#define r_ret  3
-#define r_A4
-#define r_X5
-#define r_addr 6
-#define r_scratch1 7
-#define r_scratch2 8
-#define r_D14
-#define r_HL   15
-#define r_M16
-
-#ifndef __ASSEMBLY__
-
-/*
- * Assembly helpers from arch/powerpc/net/bpf_jit.S:
- */
-#define DECLARE_LOAD_FUNC(func)\
-   extern u8 func[], func##_negative_offset[], func##_positive_offset[]
-
-DECLARE_LOAD_FUNC(sk_load_word);
-DECLARE_LOAD_FUNC(sk_load_half);
-DECLARE_LOAD_FUNC(sk_load_byte);
-DECLARE_LOAD_FUNC(sk_load_byte_msh);
-
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, 
base, i));   \
-   else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));  \
-   EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0)
-
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, 
i)); \
-   else {  EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));
\
-   EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0)
-
-#define 

[PATCH] powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"

2020-12-16 Thread Colin King
From: Colin Ian King 

There is a spelling mistake in the Kconfig help text. Fix it.

Signed-off-by: Colin Ian King 
---
 arch/powerpc/platforms/44x/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/44x/Kconfig 
b/arch/powerpc/platforms/44x/Kconfig
index 78ac6d67a935..71068d89fcaa 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -5,7 +5,7 @@ config PPC_47x
select MPIC
help
  This option enables support for the 47x family of processors and is
- not currently compatible with other 44x or 46x varients
+ not currently compatible with other 44x or 46x variants
 
 config BAMBOO
bool "Bamboo"
-- 
2.29.2



[PATCH v3 1/4] KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1

2020-12-16 Thread Ravi Bangoria
On powerpc, L1 hypervisor takes help of L0 using H_ENTER_NESTED
hcall to load L2 guest state in cpu. L1 hypervisor prepares the
L2 state in struct hv_guest_state and passes a pointer to it via
hcall. Using that pointer, L0 reads/writes that state directly
from/to L1 memory. Thus L0 must be aware of hv_guest_state layout
of L1. Currently it uses version field to achieve this. i.e. If
L0 hv_guest_state.version != L1 hv_guest_state.version, L0 won't
allow nested kvm guest.

This restriction can be loosen up a bit. L0 can be taught to
understand older layout of hv_guest_state, if we restrict the
new member to be added only at the end. i.e. we can allow
nested guest even when L0 hv_guest_state.version > L1
hv_guest_state.version. Though, the other way around is not
possible.

Signed-off-by: Ravi Bangoria 
Reviewed-by: Fabiano Rosas 
---
 arch/powerpc/include/asm/hvcall.h   | 17 +++--
 arch/powerpc/kvm/book3s_hv_nested.c | 55 +++--
 2 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index c1fbccb04390..ca6840239f90 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -526,9 +526,12 @@ struct h_cpu_char_result {
u64 behaviour;
 };
 
-/* Register state for entering a nested guest with H_ENTER_NESTED */
+/*
+ * Register state for entering a nested guest with H_ENTER_NESTED.
+ * New member must be added at the end.
+ */
 struct hv_guest_state {
-   u64 version;/* version of this structure layout */
+   u64 version;/* version of this structure layout, must be 
first */
u32 lpid;
u32 vcpu_token;
/* These registers are hypervisor privileged (at least for writing) */
@@ -562,6 +565,16 @@ struct hv_guest_state {
 /* Latest version of hv_guest_state structure */
 #define HV_GUEST_STATE_VERSION 1
 
+static inline int hv_guest_state_size(unsigned int version)
+{
+   switch (version) {
+   case 1:
+   return offsetofend(struct hv_guest_state, ppr);
+   default:
+   return -1;
+   }
+}
+
 /*
  * From the document "H_GetPerformanceCounterInfo Interface" v1.07
  *
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index 33b58549a9aa..937dd5114300 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -215,12 +215,51 @@ static void kvmhv_nested_mmio_needed(struct kvm_vcpu 
*vcpu, u64 regs_ptr)
}
 }
 
+static int kvmhv_read_guest_state_and_regs(struct kvm_vcpu *vcpu,
+  struct hv_guest_state *l2_hv,
+  struct pt_regs *l2_regs,
+  u64 hv_ptr, u64 regs_ptr)
+{
+   int size;
+
+   if (kvm_vcpu_read_guest(vcpu, hv_ptr, _hv->version,
+   sizeof(l2_hv->version)))
+   return -1;
+
+   if (kvmppc_need_byteswap(vcpu))
+   l2_hv->version = swab64(l2_hv->version);
+
+   size = hv_guest_state_size(l2_hv->version);
+   if (size < 0)
+   return -1;
+
+   return kvm_vcpu_read_guest(vcpu, hv_ptr, l2_hv, size) ||
+   kvm_vcpu_read_guest(vcpu, regs_ptr, l2_regs,
+   sizeof(struct pt_regs));
+}
+
+static int kvmhv_write_guest_state_and_regs(struct kvm_vcpu *vcpu,
+   struct hv_guest_state *l2_hv,
+   struct pt_regs *l2_regs,
+   u64 hv_ptr, u64 regs_ptr)
+{
+   int size;
+
+   size = hv_guest_state_size(l2_hv->version);
+   if (size < 0)
+   return -1;
+
+   return kvm_vcpu_write_guest(vcpu, hv_ptr, l2_hv, size) ||
+   kvm_vcpu_write_guest(vcpu, regs_ptr, l2_regs,
+sizeof(struct pt_regs));
+}
+
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
 {
long int err, r;
struct kvm_nested_guest *l2;
struct pt_regs l2_regs, saved_l1_regs;
-   struct hv_guest_state l2_hv, saved_l1_hv;
+   struct hv_guest_state l2_hv = {0}, saved_l1_hv;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
u64 hv_ptr, regs_ptr;
u64 hdec_exp;
@@ -235,17 +274,15 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
hv_ptr = kvmppc_get_gpr(vcpu, 4);
regs_ptr = kvmppc_get_gpr(vcpu, 5);
vcpu->srcu_idx = srcu_read_lock(>kvm->srcu);
-   err = kvm_vcpu_read_guest(vcpu, hv_ptr, _hv,
- sizeof(struct hv_guest_state)) ||
-   kvm_vcpu_read_guest(vcpu, regs_ptr, _regs,
-   sizeof(struct pt_regs));
+   err = kvmhv_read_guest_state_and_regs(vcpu, _hv, _regs,
+ hv_ptr, regs_ptr);
srcu_read_unlock(>kvm->srcu, 

[PATCH v3 4/4] KVM: PPC: Introduce new capability for 2nd DAWR

2020-12-16 Thread Ravi Bangoria
Introduce KVM_CAP_PPC_DAWR1 which can be used by Qemu to query whether
kvm supports 2nd DAWR or not. The capability is by default disabled
even when the underlying CPU supports 2nd DAWR. Qemu needs to check
and enable it manually to use the feature.

Signed-off-by: Ravi Bangoria 
---
 Documentation/virt/kvm/api.rst | 10 ++
 arch/powerpc/include/asm/kvm_ppc.h |  1 +
 arch/powerpc/kvm/book3s_hv.c   | 12 
 arch/powerpc/kvm/powerpc.c | 10 ++
 include/uapi/linux/kvm.h   |  1 +
 tools/include/uapi/linux/kvm.h |  1 +
 6 files changed, 35 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index abb24575bdf9..049f07ebf197 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6016,6 +6016,16 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit 
notifications which user space
 can then handle to implement model specific MSR handling and/or user 
notifications
 to inform a user that an MSR was not handled.
 
+7.22 KVM_CAP_PPC_DAWR1
+--
+
+:Architectures: ppc
+:Parameters: none
+:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR
+
+This capability can be used to check / enable 2nd DAWR feature provided
+by POWER10 processor.
+
 8. Other capabilities.
 ==
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 0a056c64c317..13c39d24dda5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -314,6 +314,7 @@ struct kvmppc_ops {
  int size);
int (*enable_svm)(struct kvm *kvm);
int (*svm_off)(struct kvm *kvm);
+   int (*enable_dawr1)(struct kvm *kvm);
 };
 
 extern struct kvmppc_ops *kvmppc_hv_ops;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index b7a30c0692a7..04c02344bd3f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -5625,6 +5625,17 @@ static int kvmhv_svm_off(struct kvm *kvm)
return ret;
 }
 
+static int kvmhv_enable_dawr1(struct kvm *kvm)
+{
+   if (!cpu_has_feature(CPU_FTR_DAWR1))
+   return -ENODEV;
+
+   /* kvm == NULL means the caller is testing if the capability exists */
+   if (kvm)
+   kvm->arch.dawr1_enabled = true;
+   return 0;
+}
+
 static struct kvmppc_ops kvm_ops_hv = {
.get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv,
.set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv,
@@ -5668,6 +5679,7 @@ static struct kvmppc_ops kvm_ops_hv = {
.store_to_eaddr = kvmhv_store_to_eaddr,
.enable_svm = kvmhv_enable_svm,
.svm_off = kvmhv_svm_off,
+   .enable_dawr1 = kvmhv_enable_dawr1,
 };
 
 static int kvm_init_subcore_bitmap(void)
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 13999123b735..380656528b5b 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -678,6 +678,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = hv_enabled && kvmppc_hv_ops->enable_svm &&
!kvmppc_hv_ops->enable_svm(NULL);
break;
+   case KVM_CAP_PPC_DAWR1:
+   r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 &&
+  !kvmppc_hv_ops->enable_dawr1(NULL));
+   break;
 #endif
default:
r = 0;
@@ -2187,6 +2191,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
break;
r = kvm->arch.kvm_ops->enable_svm(kvm);
break;
+   case KVM_CAP_PPC_DAWR1:
+   r = -EINVAL;
+   if (!is_kvmppc_hv_enabled(kvm) || 
!kvm->arch.kvm_ops->enable_dawr1)
+   break;
+   r = kvm->arch.kvm_ops->enable_dawr1(kvm);
+   break;
 #endif
default:
r = -EINVAL;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index ca41220b40b8..f1210f99a52d 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_PPC_DAWR1 191
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index ca41220b40b8..f1210f99a52d 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1053,6 +1053,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_X86_USER_SPACE_MSR 188
 #define KVM_CAP_X86_MSR_FILTER 189
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
+#define KVM_CAP_PPC_DAWR1 191
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
-- 
2.26.2



[PATCH v3 0/4] KVM: PPC: Power10 2nd DAWR enablement

2020-12-16 Thread Ravi Bangoria
Enable p10 2nd DAWR feature for Book3S kvm guest. DAWR is a hypervisor
resource and thus H_SET_MODE hcall is used to set/unset it. A new case
H_SET_MODE_RESOURCE_SET_DAWR1 is introduced in H_SET_MODE hcall for
setting/unsetting 2nd DAWR. Also, new capability KVM_CAP_PPC_DAWR1 has
been added to query 2nd DAWR support via kvm ioctl.

This feature also needs to be enabled in Qemu to really use it. I'll
post Qemu patches once kvm patches get accepted.

v2: 
https://lore.kernel.org/kvm/20201124105953.39325-1-ravi.bango...@linux.ibm.com

v2->v3:
 - Patch #1. If L0 version > L1, L0 hv_guest_state will contain some
   additional fields which won't be filled while reading from L1
   memory and thus they can contain garbage. Initialize l2_hv with 0s
   to avoid such situations.
 - Patch #3. Introduce per vm flag dawr1_enabled.
 - Patch #4. Instead of auto enabling KVM_CAP_PPC_DAWR1, let user check
   and enable it manually. Also move KVM_CAP_PPC_DAWR1 check / enable
   logic inside #if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE).
 - Explain KVM_CAP_PPC_DAWR1 in Documentation/virt/kvm/api.rst 
 - Rebased on top of 5.10-rc3.

v1->v2:
 - patch #1: New patch
 - patch #2: Don't rename KVM_REG_PPC_DAWR, it's an uapi macro
 - patch #3: Increment HV_GUEST_STATE_VERSION
 - Split kvm and selftests patches into different series
 - Patches rebased to paulus/kvm-ppc-next (cf59eb13e151) + few
   other watchpoint patches which are yet to be merged in
   paulus/kvm-ppc-next.

Ravi Bangoria (4):
  KVM: PPC: Allow nested guest creation when L0 hv_guest_state > L1
  KVM: PPC: Rename current DAWR macros and variables
  KVM: PPC: Add infrastructure to support 2nd DAWR
  KVM: PPC: Introduce new capability for 2nd DAWR

 Documentation/virt/kvm/api.rst| 12 
 arch/powerpc/include/asm/hvcall.h | 25 ++-
 arch/powerpc/include/asm/kvm_host.h   |  7 +-
 arch/powerpc/include/asm/kvm_ppc.h|  1 +
 arch/powerpc/include/uapi/asm/kvm.h   |  2 +
 arch/powerpc/kernel/asm-offsets.c |  6 +-
 arch/powerpc/kvm/book3s_hv.c  | 79 +++
 arch/powerpc/kvm/book3s_hv_nested.c   | 70 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 43 +---
 arch/powerpc/kvm/powerpc.c| 10 +++
 include/uapi/linux/kvm.h  |  1 +
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 +
 tools/include/uapi/linux/kvm.h|  1 +
 13 files changed, 216 insertions(+), 43 deletions(-)

-- 
2.26.2



[PATCH v3 2/4] KVM: PPC: Rename current DAWR macros and variables

2020-12-16 Thread Ravi Bangoria
Power10 is introducing second DAWR. Use real register names (with
suffix 0) from ISA for current macros and variables used by kvm.
One exception is KVM_REG_PPC_DAWR. Keep it as it is because it's
uapi so changing it will break userspace.

Signed-off-by: Ravi Bangoria 
---
 arch/powerpc/include/asm/kvm_host.h |  4 ++--
 arch/powerpc/kernel/asm-offsets.c   |  4 ++--
 arch/powerpc/kvm/book3s_hv.c| 24 
 arch/powerpc/kvm/book3s_hv_nested.c |  8 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 20 ++--
 5 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index d67a470e95a3..62cadf1a596e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -584,8 +584,8 @@ struct kvm_vcpu_arch {
u32 ctrl;
u32 dabrx;
ulong dabr;
-   ulong dawr;
-   ulong dawrx;
+   ulong dawr0;
+   ulong dawrx0;
ulong ciabr;
ulong cfar;
ulong ppr;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index c2722ff36e98..5a77aac516ba 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -548,8 +548,8 @@ int main(void)
OFFSET(VCPU_CTRL, kvm_vcpu, arch.ctrl);
OFFSET(VCPU_DABR, kvm_vcpu, arch.dabr);
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
-   OFFSET(VCPU_DAWR, kvm_vcpu, arch.dawr);
-   OFFSET(VCPU_DAWRX, kvm_vcpu, arch.dawrx);
+   OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
+   OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e3b1839fc251..bcbad8daa974 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -782,8 +782,8 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, 
unsigned long mflags,
return H_UNSUPPORTED_FLAG_START;
if (value2 & DABRX_HYP)
return H_P4;
-   vcpu->arch.dawr  = value1;
-   vcpu->arch.dawrx = value2;
+   vcpu->arch.dawr0  = value1;
+   vcpu->arch.dawrx0 = value2;
return H_SUCCESS;
case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE:
/* KVM does not support mflags=2 (AIL=2) */
@@ -1747,10 +1747,10 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
break;
case KVM_REG_PPC_DAWR:
-   *val = get_reg_val(id, vcpu->arch.dawr);
+   *val = get_reg_val(id, vcpu->arch.dawr0);
break;
case KVM_REG_PPC_DAWRX:
-   *val = get_reg_val(id, vcpu->arch.dawrx);
+   *val = get_reg_val(id, vcpu->arch.dawrx0);
break;
case KVM_REG_PPC_CIABR:
*val = get_reg_val(id, vcpu->arch.ciabr);
@@ -1979,10 +1979,10 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, 
u64 id,
vcpu->arch.vcore->vtb = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAWR:
-   vcpu->arch.dawr = set_reg_val(id, *val);
+   vcpu->arch.dawr0 = set_reg_val(id, *val);
break;
case KVM_REG_PPC_DAWRX:
-   vcpu->arch.dawrx = set_reg_val(id, *val) & ~DAWRX_HYP;
+   vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP;
break;
case KVM_REG_PPC_CIABR:
vcpu->arch.ciabr = set_reg_val(id, *val);
@@ -3437,8 +3437,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
int trap;
unsigned long host_hfscr = mfspr(SPRN_HFSCR);
unsigned long host_ciabr = mfspr(SPRN_CIABR);
-   unsigned long host_dawr = mfspr(SPRN_DAWR0);
-   unsigned long host_dawrx = mfspr(SPRN_DAWRX0);
+   unsigned long host_dawr0 = mfspr(SPRN_DAWR0);
+   unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0);
unsigned long host_psscr = mfspr(SPRN_PSSCR);
unsigned long host_pidr = mfspr(SPRN_PID);
 
@@ -3477,8 +3477,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
mtspr(SPRN_SPURR, vcpu->arch.spurr);
 
if (dawr_enabled()) {
-   mtspr(SPRN_DAWR0, vcpu->arch.dawr);
-   mtspr(SPRN_DAWRX0, vcpu->arch.dawrx);
+   mtspr(SPRN_DAWR0, vcpu->arch.dawr0);
+   mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0);
}
mtspr(SPRN_CIABR, vcpu->arch.ciabr);
mtspr(SPRN_IC, vcpu->arch.ic);
@@ -3530,8 +3530,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu 
*vcpu, u64 time_limit,
  (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));

[PATCH v3 3/4] KVM: PPC: Add infrastructure to support 2nd DAWR

2020-12-16 Thread Ravi Bangoria
kvm code assumes single DAWR everywhere. Add code to support 2nd DAWR.
DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/
unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR.
Also, kvm will support 2nd DAWR only if CPU_FTR_DAWR1 is set.

Signed-off-by: Ravi Bangoria 
---
 Documentation/virt/kvm/api.rst|  2 ++
 arch/powerpc/include/asm/hvcall.h |  8 -
 arch/powerpc/include/asm/kvm_host.h   |  3 ++
 arch/powerpc/include/uapi/asm/kvm.h   |  2 ++
 arch/powerpc/kernel/asm-offsets.c |  2 ++
 arch/powerpc/kvm/book3s_hv.c  | 43 +++
 arch/powerpc/kvm/book3s_hv_nested.c   |  7 
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 23 
 tools/arch/powerpc/include/uapi/asm/kvm.h |  2 ++
 9 files changed, 91 insertions(+), 1 deletion(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 36d5f1f3c6dd..abb24575bdf9 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2249,6 +2249,8 @@ registers, find a list below:
   PPC KVM_REG_PPC_PSSCR   64
   PPC KVM_REG_PPC_DEC_EXPIRY  64
   PPC KVM_REG_PPC_PTCR64
+  PPC KVM_REG_PPC_DAWR1   64
+  PPC KVM_REG_PPC_DAWRX1  64
   PPC KVM_REG_PPC_TM_GPR0 64
   ...
   PPC KVM_REG_PPC_TM_GPR3164
diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index ca6840239f90..98afa58b619a 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -560,16 +560,22 @@ struct hv_guest_state {
u64 pidr;
u64 cfar;
u64 ppr;
+   /* Version 1 ends here */
+   u64 dawr1;
+   u64 dawrx1;
+   /* Version 2 ends here */
 };
 
 /* Latest version of hv_guest_state structure */
-#define HV_GUEST_STATE_VERSION 1
+#define HV_GUEST_STATE_VERSION 2
 
 static inline int hv_guest_state_size(unsigned int version)
 {
switch (version) {
case 1:
return offsetofend(struct hv_guest_state, ppr);
+   case 2:
+   return offsetofend(struct hv_guest_state, dawrx1);
default:
return -1;
}
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 62cadf1a596e..a93cfb672421 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -307,6 +307,7 @@ struct kvm_arch {
u8 svm_enabled;
bool threads_indep;
bool nested_enable;
+   bool dawr1_enabled;
pgd_t *pgtable;
u64 process_table;
struct dentry *debugfs_dir;
@@ -586,6 +587,8 @@ struct kvm_vcpu_arch {
ulong dabr;
ulong dawr0;
ulong dawrx0;
+   ulong dawr1;
+   ulong dawrx1;
ulong ciabr;
ulong cfar;
ulong ppr;
diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index c3af3f324c5a..9f18fa090f1f 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char {
 #define KVM_REG_PPC_MMCR3  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
 #define KVM_REG_PPC_SIER2  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
 #define KVM_REG_PPC_SIER3  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+#define KVM_REG_PPC_DAWR1  (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4)
+#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5)
 
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 5a77aac516ba..a35ea4e19360 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -550,6 +550,8 @@ int main(void)
OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx);
OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0);
OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0);
+   OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1);
+   OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1);
OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr);
OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags);
OFFSET(VCPU_DEC, kvm_vcpu, arch.dec);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index bcbad8daa974..b7a30c0692a7 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -785,6 +785,22 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, 
unsigned long mflags,
vcpu->arch.dawr0  = value1;
vcpu->arch.dawrx0 = value2;
return H_SUCCESS;
+   case H_SET_MODE_RESOURCE_SET_DAWR1:
+   if (!kvmppc_power8_compatible(vcpu))
+   return H_P2;
+   if (!ppc_breakpoint_available())
+   return H_P2;
+   if (!cpu_has_feature(CPU_FTR_DAWR1))
+   return 

Re: [PATCH v3 04/19] powerpc/perf: move perf irq/nmi handling details into traps.c

2020-12-16 Thread Athira Rajeev
On 28-Nov-2020, at 8:10 PM, Nicholas Piggin  wrote:This is required in order to allow more significant differences betweenNMI type interrupt handlers and regular asynchronous handlers.Signed-off-by: Nicholas Piggin Reviewed this patch and the changes looks good to me.Reviewed-by: Athira Rajeev ThanksAthira---arch/powerpc/kernel/traps.c  | 31 +++-arch/powerpc/perf/core-book3s.c  | 35 ++--arch/powerpc/perf/core-fsl-emb.c | 25 ---3 files changed, 32 insertions(+), 59 deletions(-)diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.cindex 902fcbd1a778..7dda72eb97cc 100644--- a/arch/powerpc/kernel/traps.c+++ b/arch/powerpc/kernel/traps.c@@ -1919,11 +1919,40 @@ void vsx_unavailable_tm(struct pt_regs *regs)}#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */-void performance_monitor_exception(struct pt_regs *regs)+static void performance_monitor_exception_nmi(struct pt_regs *regs)+{+	nmi_enter();++	__this_cpu_inc(irq_stat.pmu_irqs);++	perf_irq(regs);++	nmi_exit();+}++static void performance_monitor_exception_async(struct pt_regs *regs){+	irq_enter();+	__this_cpu_inc(irq_stat.pmu_irqs);	perf_irq(regs);++	irq_exit();+}++void performance_monitor_exception(struct pt_regs *regs)+{+	/*+	 * On 64-bit, if perf interrupts hit in a local_irq_disable+	 * (soft-masked) region, we consider them as NMIs. This is required to+	 * prevent hash faults on user addresses when reading callchains (and+	 * looks better from an irq tracing perspective).+	 */+	if (IS_ENABLED(CONFIG_PPC64) && unlikely(arch_irq_disabled_regs(regs)))+		performance_monitor_exception_nmi(regs);+	else+		performance_monitor_exception_async(regs);}#ifdef CONFIG_PPC_ADV_DEBUG_REGSdiff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.cindex 08643cba1494..9fd8cae09218 100644--- a/arch/powerpc/perf/core-book3s.c+++ b/arch/powerpc/perf/core-book3s.c@@ -109,10 +109,6 @@ static inline void perf_read_regs(struct pt_regs *regs){	regs->result = 0;}-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return 0;-}static inline int siar_valid(struct pt_regs *regs){@@ -328,15 +324,6 @@ static inline void perf_read_regs(struct pt_regs *regs)	regs->result = use_siar;}-/*- * If interrupts were soft-disabled when a PMU interrupt occurs, treat- * it as an NMI.- */-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-	return (regs->softe & IRQS_DISABLED);-}-/* * On processors like P7+ that have the SIAR-Valid bit, marked instructions * must be sampled only if the SIAR-valid bit is set.@@ -2224,7 +2211,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	struct perf_event *event;	unsigned long val[8];	int found, active;-	int nmi;	if (cpuhw->n_limited)		freeze_limited_counters(cpuhw, mfspr(SPRN_PMC5),@@ -2232,18 +2218,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	perf_read_regs(regs);-	/*-	 * If perf interrupts hit in a local_irq_disable (soft-masked) region,-	 * we consider them as NMIs. This is required to prevent hash faults on-	 * user addresses when reading callchains. See the NMI test in-	 * do_hash_page.-	 */-	nmi = perf_intr_is_nmi(regs);-	if (nmi)-		nmi_enter();-	else-		irq_enter();-	/* Read all the PMCs since we'll need them a bunch of times */	for (i = 0; i < ppmu->n_counter; ++i)		val[i] = read_pmc(i + 1);@@ -2289,8 +2263,8 @@ static void __perf_event_interrupt(struct pt_regs *regs)			}		}	}-	if (!found && !nmi && printk_ratelimit())-		printk(KERN_WARNING "Can't find PMC that caused IRQ\n");+	if (unlikely(!found) && !arch_irq_disabled_regs(regs))+		printk_ratelimited(KERN_WARNING "Can't find PMC that caused IRQ\n");	/*	 * Reset MMCR0 to its normal value.  This will set PMXE and@@ -2300,11 +2274,6 @@ static void __perf_event_interrupt(struct pt_regs *regs)	 * we get back out of this interrupt.	 */	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);--	if (nmi)-		nmi_exit();-	else-		irq_exit();}static void perf_event_interrupt(struct pt_regs *regs)diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.cindex e0e7e276bfd2..ee721f420a7b 100644--- a/arch/powerpc/perf/core-fsl-emb.c+++ b/arch/powerpc/perf/core-fsl-emb.c@@ -31,19 +31,6 @@ static atomic_t num_events;/* Used to avoid races in calling reserve/release_pmc_hardware */static DEFINE_MUTEX(pmc_reserve_mutex);-/*- * If interrupts were soft-disabled when a PMU interrupt occurs, treat- * it as an NMI.- */-static inline int perf_intr_is_nmi(struct pt_regs *regs)-{-#ifdef __powerpc64__-	return (regs->softe & IRQS_DISABLED);-#else-	return 0;-#endif-}-static void perf_event_interrupt(struct pt_regs *regs);/*@@ -659,13 +646,6 @@ static void perf_event_interrupt(struct pt_regs *regs)	struct perf_event *event;	unsigned long val;	int found = 0;-	int nmi;--	nmi = perf_intr_is_nmi(regs);-	if (nmi)-		nmi_enter();-	else-		irq_enter();	for (i = 0; i < ppmu->n_counter; ++i) {		event = cpuhw->event[i];@@ -690,11 +670,6 @@ static void perf_event_interrupt(struct pt_regs *regs)	

[RFC PATCH v1 7/7] powerpc/bpf: Implement extended BPF on PPC32

2020-12-16 Thread Christophe Leroy
Implement Extended Berkeley Packet Filter on Powerpc 32

Test result with test_bpf module:

test_bpf: Summary: 378 PASSED, 0 FAILED, [354/366 JIT'ed]

Registers mapping:

[BPF_REG_0] = r11-r12
/* function arguments */
[BPF_REG_1] = r3-r4
[BPF_REG_2] = r5-r6
[BPF_REG_3] = r7-r8
[BPF_REG_4] = r9-r10
[BPF_REG_5] = r21-r22 (Args 9 and 10 come in via the stack)
/* non volatile registers */
[BPF_REG_6] = r23-r24
[BPF_REG_7] = r25-r26
[BPF_REG_8] = r27-r28
[BPF_REG_9] = r29-r30
/* frame pointer aka BPF_REG_10 */
[BPF_REG_FP] = r31
/* eBPF jit internal registers */
[BPF_REG_AX] = r19-r20
[TMP_REG] = r18

As PPC32 doesn't have a redzone in the stack,
use r17 as tail call counter.

r0 is used as temporary register as much as possible. It is referenced
directly in the code in order to avoid misuse of it, because some
instructions interpret it as value 0 instead of register r0
(ex: addi, addis, stw, lwz, ...)

The following operations are not implemented:

case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
case BPF_STX | BPF_XADD | BPF_DW: /* *(u64 *)(dst + off) += src 
*/

The following operations are only implemented for power of two constants:

case BPF_ALU64 | BPF_MOD | BPF_K: /* dst %= imm */
case BPF_ALU64 | BPF_DIV | BPF_K: /* dst /= imm */

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig  |2 +-
 arch/powerpc/net/Makefile |2 +-
 arch/powerpc/net/bpf_jit.h|4 +
 arch/powerpc/net/bpf_jit32.h  |   58 ++
 arch/powerpc/net/bpf_jit_comp32.c | 1020 +
 5 files changed, 1084 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/net/bpf_jit32.h
 create mode 100644 arch/powerpc/net/bpf_jit_comp32.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 6d1454d31a53..e09d0bfed843 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -199,7 +199,7 @@ config PPC
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
select HAVE_DYNAMIC_FTRACE_WITH_REGSif MPROFILE_KERNEL
-   select HAVE_EBPF_JITif PPC64
+   select HAVE_EBPF_JIT
select HAVE_EFFICIENT_UNALIGNED_ACCESS  if !(CPU_LITTLE_ENDIAN && 
POWER7_CPU)
select HAVE_FAST_GUP
select HAVE_FTRACE_MCOUNT_RECORD
diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 969cde177880..8e60af32e51e 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,4 +2,4 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp$(BITS).o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index fb4656986fb9..a45b8266355d 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -42,6 +42,10 @@
EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));\
} } while(0)
 
+#ifdef CONFIG_PPC32
+#define PPC_EX32(r, i) EMIT(PPC_RAW_LI((r), (i) < 0 ? -1 : 0))
+#endif
+
 #define PPC_LI64(d, i) do {  \
if ((long)(i) >= -2147483648 &&   \
(long)(i) < 2147483648)   \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
new file mode 100644
index ..3e8149f45368
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * BPF JIT compiler for PPC32
+ *
+ */
+#ifndef _BPF_JIT32_H
+#define _BPF_JIT32_H
+
+#include "bpf_jit.h"
+
+/*
+ * Stack layout:
+ *
+ * [   prev sp ] <-
+ * [   nv gpr save area] 16 * 4|
+ * fp (r31) -->[   ebpf stack space] upto 512  |
+ * [ frame header  ] 16|
+ * sp (r1) --->[stack pointer  ] --
+ */
+
+/* for gpr non volatile registers r18 to r31 (14) + r17 for tail call + 
alignment */
+#define BPF_PPC_STACK_SAVE (14 * 4 + 4 + 4)
+/* stack frame, ensure this is quadword aligned */
+#define BPF_PPC_STACKFRAME(ctx)(STACK_FRAME_MIN_SIZE + 
BPF_PPC_STACK_SAVE + (ctx)->stack_size)
+
+#ifndef __ASSEMBLY__
+
+/* BPF register usage */
+#define TMP_REG(MAX_BPF_JIT_REG + 0)
+
+/* BPF to ppc register mappings */
+static const int b2p[] = {
+   /* function return value */
+   [BPF_REG_0] = 12,
+   /* function arguments */
+   [BPF_REG_1] = 4,
+   [BPF_REG_2] = 6,
+   [BPF_REG_3] = 8,
+   [BPF_REG_4] = 10,
+   [BPF_REG_5] = 22,
+   /* non volatile registers */
+   [BPF_REG_6] = 24,
+   [BPF_REG_7] = 26,
+  

[RFC PATCH v1 5/7] powerpc/bpf: Change values of SEEN_ flags

2020-12-16 Thread Christophe Leroy
Because PPC32 will use more non volatile registers,
move SEEN_ flags to positions 0-2 which corresponds to special
registers.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/net/bpf_jit.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index b34abfce15a6..fb4656986fb9 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -108,18 +108,18 @@ static inline bool is_nearbranch(int offset)
 #define COND_LT(CR0_LT | COND_CMP_TRUE)
 #define COND_LE(CR0_GT | COND_CMP_FALSE)
 
-#define SEEN_FUNC  0x1000 /* might call external helpers */
-#define SEEN_STACK 0x2000 /* uses BPF stack */
-#define SEEN_TAILCALL  0x4000 /* uses tail calls */
+#define SEEN_FUNC  0x2000 /* might call external helpers */
+#define SEEN_STACK 0x4000 /* uses BPF stack */
+#define SEEN_TAILCALL  0x8000 /* uses tail calls */
 
 struct codegen_context {
/*
 * This is used to track register usage as well
 * as calls to external helpers.
 * - register usage is tracked with corresponding
-*   bits (r3-r10 and r27-r31)
+*   bits (r3-r31)
 * - rest of the bits can be used to track other
-*   things -- for now, we use bits 16 to 23
+*   things -- for now, we use bits 0 to 2
 *   encoded in SEEN_* macros above
 */
unsigned int seen;
-- 
2.25.0



[RFC PATCH v1 6/7] powerpc/asm: Add some opcodes in asm/ppc-opcode.h for PPC32 eBPF

2020-12-16 Thread Christophe Leroy
The following opcodes will be needed for the implementation
of eBPF for PPC32. Add them in asm/ppc-opcode.h

PPC_RAW_ADDE
PPC_RAW_ADDZE
PPC_RAW_ADDME
PPC_RAW_MFLR
PPC_RAW_ADDIC
PPC_RAW_ADDIC_DOT
PPC_RAW_SUBFC
PPC_RAW_SUBFE
PPC_RAW_SUBFIC
PPC_RAW_SUBFZE
PPC_RAW_ANDIS
PPC_RAW_NOR

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/ppc-opcode.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h 
b/arch/powerpc/include/asm/ppc-opcode.h
index ed161ef2b3ca..5b60020dc1f4 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -437,6 +437,9 @@
 #define PPC_RAW_STFDX(s, a, b) (0x7c0005ae | ___PPC_RS(s) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_LVX(t, a, b)   (0x7cce | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_STVX(s, a, b)  (0x7c0001ce | ___PPC_RS(s) | 
___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDE(t, a, b)  (0x7c000114 | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDZE(t, a)(0x7c000194 | ___PPC_RT(t) | 
___PPC_RA(a))
+#define PPC_RAW_ADDME(t, a)(0x7c0001d4 | ___PPC_RT(t) | 
___PPC_RA(a))
 #define PPC_RAW_ADD(t, a, b)   (PPC_INST_ADD | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_ADD_DOT(t, a, b)   (PPC_INST_ADD | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_ADDC(t, a, b)  (0x7c14 | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b))
@@ -445,11 +448,14 @@
 #define PPC_RAW_BLR()  (PPC_INST_BLR)
 #define PPC_RAW_BLRL() (0x4e800021)
 #define PPC_RAW_MTLR(r)(0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_MFLR(t)(PPC_INST_MFLR | ___PPC_RT(t))
 #define PPC_RAW_BCTR() (PPC_INST_BCTR)
 #define PPC_RAW_MTCTR(r)   (PPC_INST_MTCTR | ___PPC_RT(r))
 #define PPC_RAW_ADDI(d, a, i)  (PPC_INST_ADDI | ___PPC_RT(d) | 
___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LI(r, i)   PPC_RAW_ADDI(r, 0, i)
 #define PPC_RAW_ADDIS(d, a, i) (PPC_INST_ADDIS | ___PPC_RT(d) | 
___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC(d, a, i) (0x3000 | ___PPC_RT(d) | 
___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_ADDIC_DOT(d, a, i) (0x3400 | ___PPC_RT(d) | 
___PPC_RA(a) | IMM_L(i))
 #define PPC_RAW_LIS(r, i)  PPC_RAW_ADDIS(r, 0, i)
 #define PPC_RAW_STDX(r, base, b)   (0x7c00012a | ___PPC_RS(r) | 
___PPC_RA(base) | ___PPC_RB(b))
 #define PPC_RAW_STDU(r, base, i)   (0xf801 | ___PPC_RS(r) | 
___PPC_RA(base) | ((i) & 0xfffc))
@@ -472,6 +478,10 @@
 #define PPC_RAW_CMPLW(a, b)(0x7c40 | ___PPC_RA(a) | 
___PPC_RB(b))
 #define PPC_RAW_CMPLD(a, b)(0x7c200040 | ___PPC_RA(a) | 
___PPC_RB(b))
 #define PPC_RAW_SUB(d, a, b)   (0x7c50 | ___PPC_RT(d) | 
___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_SUBFC(d, a, b) (0x7c10 | ___PPC_RT(d) | 
___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFE(d, a, b) (0x7c000110 | ___PPC_RT(d) | 
___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUBFIC(d, a, i)(0x2000 | ___PPC_RT(d) | 
___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_SUBFZE(d, a)   (0x7c000190 | ___PPC_RT(d) | 
___PPC_RA(a))
 #define PPC_RAW_MULD(d, a, b)  (0x7c0001d2 | ___PPC_RT(d) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULW(d, a, b)  (0x7c0001d6 | ___PPC_RT(d) | 
___PPC_RA(a) | ___PPC_RB(b))
 #define PPC_RAW_MULHWU(d, a, b)(0x7c16 | ___PPC_RT(d) | 
___PPC_RA(a) | ___PPC_RB(b))
@@ -484,11 +494,13 @@
 #define PPC_RAW_DIVDEU_DOT(t, a, b)(0x7c000312 | ___PPC_RT(t) | 
___PPC_RA(a) | ___PPC_RB(b) | 0x1)
 #define PPC_RAW_AND(d, a, b)   (0x7c38 | ___PPC_RA(d) | 
___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_ANDI(d, a, i)  (0x7000 | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ANDIS(d, a, i) (0x7400 | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_AND_DOT(d, a, b)   (0x7c39 | ___PPC_RA(d) | 
___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_OR(d, a, b)(0x7c000378 | ___PPC_RA(d) | 
___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_MR(d, a)   PPC_RAW_OR(d, a, a)
 #define PPC_RAW_ORI(d, a, i)   (PPC_INST_ORI | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_ORIS(d, a, i)  (PPC_INST_ORIS | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_NOR(d, a, b)   (0x7cf8 | ___PPC_RA(d) | 
___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XOR(d, a, b)   (0x7c000278 | ___PPC_RA(d) | 
___PPC_RS(a) | ___PPC_RB(b))
 #define PPC_RAW_XORI(d, a, i)  (0x6800 | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
 #define PPC_RAW_XORIS(d, a, i) (0x6c00 | ___PPC_RA(d) | 
___PPC_RS(a) | IMM_L(i))
-- 
2.25.0



[RFC PATCH v1 2/7] powerpc/bpf: Change register numbering for bpf_set/is_seen_register()

2020-12-16 Thread Christophe Leroy
Instead of using BPF register number as input in functions
bpf_set_seen_register() and bpf_is_seen_register(), use
CPU register number directly.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/net/bpf_jit_comp64.c | 16 
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/net/bpf_jit_comp64.c 
b/arch/powerpc/net/bpf_jit_comp64.c
index 022103c6a201..26a836a904f5 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -31,12 +31,12 @@ static inline void bpf_flush_icache(void *start, void *end)
 
 static inline bool bpf_is_seen_register(struct codegen_context *ctx, int i)
 {
-   return (ctx->seen & (1 << (31 - b2p[i])));
+   return ctx->seen & (1 << (31 - i));
 }
 
 static inline void bpf_set_seen_register(struct codegen_context *ctx, int i)
 {
-   ctx->seen |= (1 << (31 - b2p[i]));
+   ctx->seen |= 1 << (31 - i);
 }
 
 static inline bool bpf_has_stack_frame(struct codegen_context *ctx)
@@ -47,7 +47,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context 
*ctx)
 * - the bpf program uses its stack area
 * The latter condition is deduced from the usage of BPF_REG_FP
 */
-   return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, BPF_REG_FP);
+   return ctx->seen & SEEN_FUNC || bpf_is_seen_register(ctx, 
b2p[BPF_REG_FP]);
 }
 
 /*
@@ -124,11 +124,11 @@ static void bpf_jit_build_prologue(u32 *image, struct 
codegen_context *ctx)
 * in the protected zone below the previous stack frame
 */
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-   if (bpf_is_seen_register(ctx, i))
+   if (bpf_is_seen_register(ctx, b2p[i]))
PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, 
b2p[i]));
 
/* Setup frame pointer to point to the bpf stack area */
-   if (bpf_is_seen_register(ctx, BPF_REG_FP))
+   if (bpf_is_seen_register(ctx, b2p[BPF_REG_FP]))
EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
STACK_FRAME_MIN_SIZE + ctx->stack_size));
 }
@@ -139,7 +139,7 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct 
codegen_context *ctx
 
/* Restore NVRs */
for (i = BPF_REG_6; i <= BPF_REG_10; i++)
-   if (bpf_is_seen_register(ctx, i))
+   if (bpf_is_seen_register(ctx, b2p[i]))
PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, 
b2p[i]));
 
/* Tear down our stack frame */
@@ -330,9 +330,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 
*image,
 * any issues.
 */
if (dst_reg >= BPF_PPC_NVR_MIN && dst_reg < 32)
-   bpf_set_seen_register(ctx, insn[i].dst_reg);
+   bpf_set_seen_register(ctx, dst_reg);
if (src_reg >= BPF_PPC_NVR_MIN && src_reg < 32)
-   bpf_set_seen_register(ctx, insn[i].src_reg);
+   bpf_set_seen_register(ctx, src_reg);
 
switch (code) {
/*
-- 
2.25.0



[RFC PATCH v1 4/7] powerpc/bpf: Move common functions into bpf_jit_comp.c

2020-12-16 Thread Christophe Leroy
Move into bpf_jit_comp.c the functions that will remain common to
PPC64 and PPC32 when we add support of EBPF for PPC32.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/net/Makefile |   2 +-
 arch/powerpc/net/bpf_jit.h|   6 +
 arch/powerpc/net/bpf_jit_comp.c   | 269 ++
 arch/powerpc/net/bpf_jit_comp64.c | 258 +---
 4 files changed, 281 insertions(+), 254 deletions(-)
 create mode 100644 arch/powerpc/net/bpf_jit_comp.c

diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile
index 52c939cef5b2..969cde177880 100644
--- a/arch/powerpc/net/Makefile
+++ b/arch/powerpc/net/Makefile
@@ -2,4 +2,4 @@
 #
 # Arch-specific network modules
 #
-obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o
+obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o bpf_jit_comp64.o
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index b8fa6908fc5e..b34abfce15a6 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -143,6 +143,12 @@ static inline void bpf_set_seen_register(struct 
codegen_context *ctx, int i)
ctx->seen |= 1 << (31 - i);
 }
 
+void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, u64 
func);
+int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context 
*ctx,
+  u32 *addrs, bool extra_pass);
+void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx);
+void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx);
+
 #endif
 
 #endif
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
new file mode 100644
index ..efac89964873
--- /dev/null
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * eBPF JIT compiler
+ *
+ * Copyright 2016 Naveen N. Rao 
+ *   IBM Corporation
+ *
+ * Based on the powerpc classic BPF JIT compiler by Matt Evans
+ */
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "bpf_jit.h"
+
+static void bpf_jit_fill_ill_insns(void *area, unsigned int size)
+{
+   memset32(area, BREAKPOINT_INSTRUCTION, size / 4);
+}
+
+/* Fix the branch target addresses for subprog calls */
+static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image,
+  struct codegen_context *ctx, u32 *addrs)
+{
+   const struct bpf_insn *insn = fp->insnsi;
+   bool func_addr_fixed;
+   u64 func_addr;
+   u32 tmp_idx;
+   int i, ret;
+
+   for (i = 0; i < fp->len; i++) {
+   /*
+* During the extra pass, only the branch target addresses for
+* the subprog calls need to be fixed. All other instructions
+* can left untouched.
+*
+* The JITed image length does not change because we already
+* ensure that the JITed instruction sequence for these calls
+* are of fixed length by padding them with NOPs.
+*/
+   if (insn[i].code == (BPF_JMP | BPF_CALL) &&
+   insn[i].src_reg == BPF_PSEUDO_CALL) {
+   ret = bpf_jit_get_func_addr(fp, [i], true,
+   _addr,
+   _addr_fixed);
+   if (ret < 0)
+   return ret;
+
+   /*
+* Save ctx->idx as this would currently point to the
+* end of the JITed image and set it to the offset of
+* the instruction sequence corresponding to the
+* subprog call temporarily.
+*/
+   tmp_idx = ctx->idx;
+   ctx->idx = addrs[i] / 4;
+   bpf_jit_emit_func_call_rel(image, ctx, func_addr);
+
+   /*
+* Restore ctx->idx here. This is safe as the length
+* of the JITed sequence remains unchanged.
+*/
+   ctx->idx = tmp_idx;
+   }
+   }
+
+   return 0;
+}
+
+struct powerpc64_jit_data {
+   struct bpf_binary_header *header;
+   u32 *addrs;
+   u8 *image;
+   u32 proglen;
+   struct codegen_context ctx;
+};
+
+bool bpf_jit_needs_zext(void)
+{
+   return true;
+}
+
+struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
+{
+   u32 proglen;
+   u32 alloclen;
+   u8 *image = NULL;
+   u32 *code_base;
+   u32 *addrs;
+   struct powerpc64_jit_data *jit_data;
+   struct codegen_context cgctx;
+   int pass;
+   int flen;
+   struct bpf_binary_header *bpf_hdr;
+   struct bpf_prog *org_fp = fp;
+   struct bpf_prog *tmp_fp;
+   bool bpf_blinded = false;
+   bool extra_pass = false;
+
+   if 

[PATCH v2 0/2] Support for H_RPT_INVALIDATE in PowerPC KVM

2020-12-16 Thread Bharata B Rao
This patchset adds support for the new hcall H_RPT_INVALIDATE
and replaces the nested tlb flush calls with this new hcall
if support for the same exists.

Changes in v2:
-
- Not enabling the hcall by default now, userspace can enable it when
  required.
- Added implementation for process-scoped invalidations in the hcall.

v1: 
https://lore.kernel.org/linuxppc-dev/20201019112642.53016-1-bhar...@linux.ibm.com/T/#t

H_RPT_INVALIDATE

Syntax:
int64   /* H_Success: Return code on successful completion */
    /* H_Busy - repeat the call with the same */
    /* H_Parameter, H_P2, H_P3, H_P4, H_P5 : Invalid parameters */
    hcall(const uint64 H_RPT_INVALIDATE, /* Invalidate RPT translation 
lookaside information */
  uint64 pid,   /* PID/LPID to invalidate */
  uint64 target,    /* Invalidation target */
  uint64 type,  /* Type of lookaside information */
  uint64 pageSizes, /* Page sizes */
  uint64 start, /* Start of Effective Address (EA) range 
(inclusive) */
  uint64 end)   /* End of EA range (exclusive) */

Invalidation targets (target)
-
Core MMU    0x01 /* All virtual processors in the partition */
Core local MMU  0x02 /* Current virtual processor */
Nest MMU    0x04 /* All nest/accelerator agents in use by the partition */

A combination of the above can be specified, except core and core local.

Type of translation to invalidate (type)
---
NESTED   0x0001  /* Invalidate nested guest partition-scope */
TLB  0x0002  /* Invalidate TLB */
PWC  0x0004  /* Invalidate Page Walk Cache */
PRT  0x0008  /* Invalidate Process Table Entries if NESTED is clear */
PAT  0x0008  /* Invalidate Partition Table Entries if NESTED is set */

A combination of the above can be specified.

Page size mask (pageSizes)
--
4K  0x01
64K 0x02
2M  0x04
1G  0x08
All sizes   (-1UL)

A combination of the above can be specified.
All page sizes can be selected with -1.

Semantics: Invalidate radix tree lookaside information
   matching the parameters given.
* Return H_P2, H_P3 or H_P4 if target, type, or pageSizes parameters are
  different from the defined values.
* Return H_PARAMETER if NESTED is set and pid is not a valid nested
  LPID allocated to this partition
* Return H_P5 if (start, end) doesn't form a valid range. Start and end
  should be a valid Quadrant address and  end > start.
* Return H_NotSupported if the partition is not in running in radix
  translation mode.
* May invalidate more translation information than requested.
* If start = 0 and end = -1, set the range to cover all valid addresses.
  Else start and end should be aligned to 4kB (lower 11 bits clear).
* If NESTED is clear, then invalidate process scoped lookaside information.
  Else pid specifies a nested LPID, and the invalidation is performed
  on nested guest partition table and nested guest partition scope real
  addresses.
* If pid = 0 and NESTED is clear, then valid addresses are quadrant 3 and
  quadrant 0 spaces, Else valid addresses are quadrant 0.
* Pages which are fully covered by the range are to be invalidated.
  Those which are partially covered are considered outside invalidation
  range, which allows a caller to optimally invalidate ranges that may
  contain mixed page sizes.
* Return H_SUCCESS on success.

Bharata B Rao (2):
  KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE
  KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

 Documentation/virt/kvm/api.rst|  17 +++
 .../include/asm/book3s/64/tlbflush-radix.h|  18 +++
 arch/powerpc/include/asm/kvm_book3s.h |   3 +
 arch/powerpc/kvm/book3s_64_mmu_radix.c|  27 +++-
 arch/powerpc/kvm/book3s_hv.c  | 121 ++
 arch/powerpc/kvm/book3s_hv_nested.c   | 106 ++-
 arch/powerpc/kvm/powerpc.c|   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c  |   4 -
 include/uapi/linux/kvm.h  |   1 +
 9 files changed, 289 insertions(+), 11 deletions(-)

-- 
2.26.2



[PATCH v2 2/2] KVM: PPC: Book3S HV: Use H_RPT_INVALIDATE in nested KVM

2020-12-16 Thread Bharata B Rao
In the nested KVM case, replace H_TLB_INVALIDATE by the new hcall
H_RPT_INVALIDATE if available. The availability of this hcall
is determined from "hcall-rpt-invalidate" string in ibm,hypertas-functions
DT property.

Signed-off-by: Bharata B Rao 
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c | 27 +-
 arch/powerpc/kvm/book3s_hv_nested.c| 12 ++--
 2 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index bb35490400e9..7ea5459022cb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -21,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /*
  * Supported radix tree geometry.
@@ -318,9 +319,19 @@ void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned 
long addr,
}
 
psi = shift_to_mmu_psize(pshift);
-   rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 1),
-   lpid, rb);
+
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE)) {
+   rb = addr | (mmu_get_ap(psi) << PPC_BITLSHIFT(58));
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(0, 0, 
1),
+   lpid, rb);
+   } else {
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_TLB,
+   psize_to_rpti_pgsize(psi),
+   addr, addr + psize);
+   }
+
if (rc)
pr_err("KVM: TLB page invalidation hcall failed, rc=%ld\n", rc);
 }
@@ -334,8 +345,14 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, 
unsigned int lpid)
return;
}
 
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 1),
-   lpid, TLBIEL_INVAL_SET_LPID);
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(1, 0, 
1),
+   lpid, TLBIEL_INVAL_SET_LPID);
+   else
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_PWC, H_RPTI_PAGE_ALL,
+   0, -1UL);
if (rc)
pr_err("KVM: TLB PWC invalidation hcall failed, rc=%ld\n", rc);
 }
diff --git a/arch/powerpc/kvm/book3s_hv_nested.c 
b/arch/powerpc/kvm/book3s_hv_nested.c
index a54ba4b1d4a7..9dc694288757 100644
--- a/arch/powerpc/kvm/book3s_hv_nested.c
+++ b/arch/powerpc/kvm/book3s_hv_nested.c
@@ -19,6 +19,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static struct patb_entry *pseries_partition_tb;
 
@@ -402,8 +403,15 @@ static void kvmhv_flush_lpid(unsigned int lpid)
return;
}
 
-   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 1),
-   lpid, TLBIEL_INVAL_SET_LPID);
+   if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+   rc = plpar_hcall_norets(H_TLB_INVALIDATE, H_TLBIE_P1_ENC(2, 0, 
1),
+   lpid, TLBIEL_INVAL_SET_LPID);
+   else
+   rc = pseries_rpt_invalidate(lpid, H_RPTI_TARGET_CMMU,
+   H_RPTI_TYPE_NESTED |
+   H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+   H_RPTI_TYPE_PAT,
+   H_RPTI_PAGE_ALL, 0, -1UL);
if (rc)
pr_err("KVM: TLB LPID invalidation hcall failed, rc=%ld\n", rc);
 }
-- 
2.26.2



[PATCH v2 1/2] KVM: PPC: Book3S HV: Add support for H_RPT_INVALIDATE

2020-12-16 Thread Bharata B Rao
Implement H_RPT_INVALIDATE hcall and add KVM capability
KVM_CAP_PPC_RPT_INVALIDATE to indicate the support for the same.

This hcall does two types of TLB invalidations:

1. Process-scoped invalidations for guests with LPCR[GTSE]=0.
   This is currently not used in KVM as GTSE is not usually
   disabled in KVM.
2. Partition-scoped invalidations that an L1 hypervisor does on
   behalf of an L2 guest. This replaces the uses of the existing
   hcall H_TLB_INVALIDATE.

Signed-off-by: Bharata B Rao 
---
 Documentation/virt/kvm/api.rst|  17 +++
 .../include/asm/book3s/64/tlbflush-radix.h|  18 +++
 arch/powerpc/include/asm/kvm_book3s.h |   3 +
 arch/powerpc/kvm/book3s_hv.c  | 121 ++
 arch/powerpc/kvm/book3s_hv_nested.c   |  94 ++
 arch/powerpc/kvm/powerpc.c|   3 +
 arch/powerpc/mm/book3s64/radix_tlb.c  |   4 -
 include/uapi/linux/kvm.h  |   1 +
 8 files changed, 257 insertions(+), 4 deletions(-)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index e00a66d72372..5ce237c0d707 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -6014,6 +6014,23 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit 
notifications which user space
 can then handle to implement model specific MSR handling and/or user 
notifications
 to inform a user that an MSR was not handled.
 
+7.22 KVM_CAP_PPC_RPT_INVALIDATE
+--
+
+:Capability: KVM_CAP_PPC_RPT_INVALIDATE
+:Architectures: ppc
+:Type: vm
+
+This capability indicates that the kernel is capable of handling
+H_RPT_INVALIDATE hcall.
+
+In order to enable the use of H_RPT_INVALIDATE in the guest,
+user space might have to advertise it for the guest. For example,
+IBM pSeries (sPAPR) guest starts using it if "hcall-rpt-invalidate" is
+present in the "ibm,hypertas-functions" device-tree property.
+
+This capability is always enabled.
+
 8. Other capabilities.
 ==
 
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h 
b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index 94439e0cefc9..aace7e9b2397 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -4,6 +4,10 @@
 
 #include 
 
+#define RIC_FLUSH_TLB 0
+#define RIC_FLUSH_PWC 1
+#define RIC_FLUSH_ALL 2
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
@@ -21,6 +25,20 @@ static inline u64 psize_to_rpti_pgsize(unsigned long psize)
return H_RPTI_PAGE_ALL;
 }
 
+static inline int rpti_pgsize_to_psize(unsigned long page_size)
+{
+   if (page_size == H_RPTI_PAGE_4K)
+   return MMU_PAGE_4K;
+   if (page_size == H_RPTI_PAGE_64K)
+   return MMU_PAGE_64K;
+   if (page_size == H_RPTI_PAGE_2M)
+   return MMU_PAGE_2M;
+   if (page_size == H_RPTI_PAGE_1G)
+   return MMU_PAGE_1G;
+   else
+   return MMU_PAGE_64K; /* Default */
+}
+
 static inline int mmu_get_ap(int psize)
 {
return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index d32ec9ae73bd..0f1c5fa6e8ce 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -298,6 +298,9 @@ void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 
dw1);
 void kvmhv_release_all_nested(struct kvm *kvm);
 long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu);
 long kvmhv_do_nested_tlbie(struct kvm_vcpu *vcpu);
+long kvmhv_h_rpti_nested(struct kvm_vcpu *vcpu, unsigned long lpid,
+unsigned long type, unsigned long pg_sizes,
+unsigned long start, unsigned long end);
 int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu,
  u64 time_limit, unsigned long lpcr);
 void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e3b1839fc251..adf2d1191581 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -904,6 +904,118 @@ static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
return yield_count;
 }
 
+static inline void do_tlb_invalidate_all(unsigned long rb, unsigned long rs)
+{
+   asm volatile("ptesync" : : : "memory");
+   asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+   : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_ALL), "r"(rs)
+   : "memory");
+   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
+static inline void do_tlb_invalidate_pwc(unsigned long rb, unsigned long rs)
+{
+   asm volatile("ptesync" : : : "memory");
+   asm volatile(PPC_TLBIE_5(%0, %4, %3, %2, %1)
+   : : "r"(rb), "i"(1), "i"(1), "i"(RIC_FLUSH_PWC), "r"(rs)
+   : "memory");
+   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+