[RESEND v4 PATCH 2/2] [PowerPC] Allow use of SIMD in interrupts from kernel code

2019-05-18 Thread Shawn Landden
This even allows simd in preemptible kernel code,
as does x86, although this is rarely safe (could be used with
kthread_create_on_cpu). All callers are disabling preemption.

v4: fix build without CONFIG_AVX
change commit message
Signed-off-by: Shawn Landden 
---
 arch/powerpc/include/asm/switch_to.h |  15 +---
 arch/powerpc/kernel/process.c| 117 +++
 2 files changed, 88 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/include/asm/switch_to.h 
b/arch/powerpc/include/asm/switch_to.h
index 5b03d8a82..c79f7d24a 100644
--- a/arch/powerpc/include/asm/switch_to.h
+++ b/arch/powerpc/include/asm/switch_to.h
@@ -30,10 +30,7 @@ extern void enable_kernel_fp(void);
 extern void flush_fp_to_thread(struct task_struct *);
 extern void giveup_fpu(struct task_struct *);
 extern void save_fpu(struct task_struct *);
-static inline void disable_kernel_fp(void)
-{
-   msr_check_and_clear(MSR_FP);
-}
+extern void disable_kernel_fp(void);
 #else
 static inline void save_fpu(struct task_struct *t) { }
 static inline void flush_fp_to_thread(struct task_struct *t) { }
@@ -44,10 +41,7 @@ extern void enable_kernel_altivec(void);
 extern void flush_altivec_to_thread(struct task_struct *);
 extern void giveup_altivec(struct task_struct *);
 extern void save_altivec(struct task_struct *);
-static inline void disable_kernel_altivec(void)
-{
-   msr_check_and_clear(MSR_VEC);
-}
+extern void disable_kernel_altivec(void);
 #else
 static inline void save_altivec(struct task_struct *t) { }
 static inline void __giveup_altivec(struct task_struct *t) { }
@@ -56,10 +50,7 @@ static inline void __giveup_altivec(struct task_struct *t) { 
}
 #ifdef CONFIG_VSX
 extern void enable_kernel_vsx(void);
 extern void flush_vsx_to_thread(struct task_struct *);
-static inline void disable_kernel_vsx(void)
-{
-   msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
-}
+extern void disable_kernel_vsx(void);
 #endif
 
 #ifdef CONFIG_SPE
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index ef534831f..0136fd132 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -170,6 +170,29 @@ void __msr_check_and_clear(unsigned long bits)
 EXPORT_SYMBOL(__msr_check_and_clear);
 
 #ifdef CONFIG_PPC_FPU
+/*
+ * Track whether the kernel is using the FPU state
+ * currently.
+ *
+ * This flag is used:
+ *
+ *   - by IRQ context code to potentially use the FPU
+ * if it's unused.
+ *
+ *   - to debug kernel_fpu/altivec/vsx_begin()/end() correctness
+ */
+static DEFINE_PER_CPU(bool, in_kernel_fpu);
+
+static bool kernel_fpu_disabled(void)
+{
+   return this_cpu_read(in_kernel_fpu);
+}
+
+static bool interrupted_kernel_fpu_idle(void)
+{
+   return !kernel_fpu_disabled();
+}
+
 static void __giveup_fpu(struct task_struct *tsk)
 {
unsigned long msr;
@@ -230,7 +253,8 @@ void enable_kernel_fp(void)
 {
unsigned long cpumsr;
 
-   WARN_ON(preemptible());
+   WARN_ON_ONCE(this_cpu_read(in_kernel_fpu));
+   this_cpu_write(in_kernel_fpu, true);
 
cpumsr = msr_check_and_set(MSR_FP);
 
@@ -251,6 +275,15 @@ void enable_kernel_fp(void)
 }
 EXPORT_SYMBOL(enable_kernel_fp);
 
+void disable_kernel_fp(void)
+{
+   WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu));
+   this_cpu_write(in_kernel_fpu, false);
+
+   msr_check_and_clear(MSR_FP);
+}
+EXPORT_SYMBOL(disable_kernel_fp);
+
 static int restore_fp(struct task_struct *tsk)
 {
if (tsk->thread.load_fp || tm_active_with_fp(tsk)) {
@@ -260,6 +293,37 @@ static int restore_fp(struct task_struct *tsk)
}
return 0;
 }
+
+/*
+ * Were we in user mode when we were
+ * interrupted?
+ *
+ * Doing kernel_altivec/vsx_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static bool interrupted_user_mode(void)
+{
+struct pt_regs *regs = get_irq_regs();
+
+return regs && user_mode(regs);
+}
+
+/*
+ * Can we use FPU in kernel mode with the
+ * whole "kernel_fpu/altivec/vsx_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool may_use_simd(void)
+{
+return !in_interrupt() ||
+interrupted_user_mode() ||
+interrupted_kernel_fpu_idle();
+}
+EXPORT_SYMBOL(may_use_simd);
+
 #else
 static int restore_fp(struct task_struct *tsk) { return 0; }
 #endif /* CONFIG_PPC_FPU */
@@ -295,7 +359,8 @@ void enable_kernel_altivec(void)
 {
unsigned long cpumsr;
 
-   WARN_ON(preemptible());
+   WARN_ON_ONCE(this_cpu_read(in_kernel_fpu));
+   this_cpu_write(in_kernel_fpu, true);
 
cpumsr = msr_check_and_set(MSR_VEC);
 
@@ -316,6 +381,14 @@ void enable_kernel_altivec(void)
 }
 EXPORT_SYMBOL(enable_kernel_altivec);
 
+extern void disable_kernel_altivec(void)
+{
+   WARN_ON_ONCE(!this_cpu_read(in_kernel_fpu));
+   this_cpu_write(in_kernel_fpu, false);
+  

[RESEND v4 PATCH 1/2] [PowerPC] Add simd.h implementation

2019-05-18 Thread Shawn Landden
Based off the x86 one.

WireGuard really wants to be able to do SIMD in interrupts,
so it can accelerate its in-bound path.

v4: allow using the may_use_simd symbol even when it always
returns false (via include guards)
Signed-off-by: Shawn Landden 
---
 arch/powerpc/include/asm/simd.h | 17 +
 arch/powerpc/kernel/process.c   | 30 ++
 2 files changed, 47 insertions(+)
 create mode 100644 arch/powerpc/include/asm/simd.h

diff --git a/arch/powerpc/include/asm/simd.h b/arch/powerpc/include/asm/simd.h
new file mode 100644
index 0..2fe26f258
--- /dev/null
+++ b/arch/powerpc/include/asm/simd.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * may_use_simd - whether it is allowable at this time to issue SIMD
+ *instructions or access the SIMD register file
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+#ifdef CONFIG_PPC_FPU
+extern bool may_use_simd(void);
+#else
+static inline bool may_use_simd(void)
+{
+   return false;
+}
+#endif
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index dd9e0d538..ef534831f 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -345,6 +345,36 @@ static int restore_altivec(struct task_struct *tsk)
}
return 0;
 }
+
+/*
+ * Were we in user mode when we were
+ * interrupted?
+ *
+ * Doing kernel_altivec/vsx_begin/end() is ok if we are running
+ * in an interrupt context from user mode - we'll just
+ * save the FPU state as required.
+ */
+static bool interrupted_user_mode(void)
+{
+   struct pt_regs *regs = get_irq_regs();
+
+   return regs && user_mode(regs);
+}
+
+/*
+ * Can we use FPU in kernel mode with the
+ * whole "kernel_fpu/altivec/vsx_begin/end()" sequence?
+ *
+ * It's always ok in process context (ie "not interrupt")
+ * but it is sometimes ok even from an irq.
+ */
+bool may_use_simd(void)
+{
+   return !in_interrupt() ||
+   interrupted_user_mode();
+}
+EXPORT_SYMBOL(may_use_simd);
+
 #else
 #define loadvec(thr) 0
 static inline int restore_altivec(struct task_struct *tsk) { return 0; }
-- 
2.21.0.1020.gf2820cf01a



[RFC PATCH v2 10/10] KVM: PPC: Ultravisor: Check for MSR_S during hv_reset_msr

2019-05-18 Thread Claudio Carvalho
From: Michael Anderson 

 - Check for MSR_S so that kvmppc_set_msr will include. Prior to this
   change return to guest would not have the S bit set.

 - Patch based on comment from Paul Mackerras 

Signed-off-by: Michael Anderson 
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index be7bc070eae5..dcc1c1fb5f9c 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -295,6 +295,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct 
kvm_vcpu *vcpu)
msr |= MSR_TS_S;
else
msr |= vcpu->arch.shregs.msr & MSR_TS_MASK;
+   msr |= vcpu->arch.shregs.msr & MSR_S;
kvmppc_set_msr(vcpu, msr);
 }
 
-- 
2.20.1



[RFC PATCH v2 09/10] KVM: PPC: Book3S HV: Fixed for running secure guests

2019-05-18 Thread Claudio Carvalho
From: Paul Mackerras 

- Pass SRR1 in r11 for UV_RETURN because SRR0 and SRR1 get set by
  the sc 2 instruction. (Note r3 - r10 potentially have hcall return
  values in them.)

- Fix kvmppc_msr_interrupt to preserve the MSR_S bit.

Signed-off-by: Paul Mackerras 
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index d89efa0783a2..1b44c85956b9 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1160,6 +1160,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 ret_to_ultra:
lwz r6, VCPU_CR(r4)
mtcrr6
+   mfspr   r11, SPRN_SRR1
LOAD_REG_IMMEDIATE(r0, UV_RETURN)
ld  r7, VCPU_GPR(R7)(r4)
ld  r6, VCPU_GPR(R6)(r4)
@@ -3360,13 +3361,16 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
  *   r0 is used as a scratch register
  */
 kvmppc_msr_interrupt:
+   andis.  r0, r11, MSR_S@h
rldicl  r0, r11, 64 - MSR_TS_S_LG, 62
-   cmpwi   r0, 2 /* Check if we are in transactional state..  */
+   cmpwi   cr1, r0, 2 /* Check if we are in transactional state..  */
ld  r11, VCPU_INTR_MSR(r9)
-   bne 1f
+   bne cr1, 1f
/* ... if transactional, change to suspended */
li  r0, 1
 1: rldimi  r11, r0, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+   beqlr
+   orisr11, r11, MSR_S@h   /* preserve MSR_S bit setting */
blr
 
 /*
-- 
2.20.1



[RFC PATCH v2 08/10] KVM: PPC: Ultravisor: Return to UV for hcalls from SVM

2019-05-18 Thread Claudio Carvalho
From: Sukadev Bhattiprolu 

All hcalls from a secure VM go to the ultravisor from where they are
reflected into the HV. When we (HV) complete processing such hcalls,
we should return to the UV rather than to the guest kernel.

Have fast_guest_return check the kvm_arch.secure_guest field so that
even a new CPU will enter UV when started (in response to a RTAS
start-cpu call).

Thanks to input from Paul Mackerras, Ram Pai and Mike Anderson.

Signed-off-by: Sukadev Bhattiprolu 
[Fix UV_RETURN token number and arch.secure_guest check]
Signed-off-by: Ram Pai 
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/include/asm/kvm_host.h   |  1 +
 arch/powerpc/include/asm/ultravisor-api.h |  1 +
 arch/powerpc/kernel/asm-offsets.c |  1 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   | 30 ---
 4 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index e6b5bb012ccb..ba7dd35cb916 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -290,6 +290,7 @@ struct kvm_arch {
cpumask_t cpu_in_guest;
u8 radix;
u8 fwnmi_enabled;
+   u8 secure_guest;
bool threads_indep;
bool nested_enable;
pgd_t *pgtable;
diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 24bfb4c1737e..15e6ce77a131 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -19,5 +19,6 @@
 
 /* opcodes */
 #define UV_WRITE_PATE  0xF104
+#define UV_RETURN  0xF11C
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 8e02444e9d3d..44742724513e 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -508,6 +508,7 @@ int main(void)
OFFSET(KVM_VRMA_SLB_V, kvm, arch.vrma_slb_v);
OFFSET(KVM_RADIX, kvm, arch.radix);
OFFSET(KVM_FWNMI, kvm, arch.fwnmi_enabled);
+   OFFSET(KVM_SECURE_GUEST, kvm, arch.secure_guest);
OFFSET(VCPU_DSISR, kvm_vcpu, arch.shregs.dsisr);
OFFSET(VCPU_DAR, kvm_vcpu, arch.shregs.dar);
OFFSET(VCPU_VPA, kvm_vcpu, arch.vpa.pinned_addr);
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 938cfa5dceed..d89efa0783a2 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Sign-extend HDEC if not on POWER9 */
 #define EXTEND_HDEC(reg)   \
@@ -1112,16 +1113,12 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
ld  r5, VCPU_LR(r4)
-   ld  r6, VCPU_CR(r4)
mtlrr5
-   mtcrr6
 
ld  r1, VCPU_GPR(R1)(r4)
ld  r2, VCPU_GPR(R2)(r4)
ld  r3, VCPU_GPR(R3)(r4)
ld  r5, VCPU_GPR(R5)(r4)
-   ld  r6, VCPU_GPR(R6)(r4)
-   ld  r7, VCPU_GPR(R7)(r4)
ld  r8, VCPU_GPR(R8)(r4)
ld  r9, VCPU_GPR(R9)(r4)
ld  r10, VCPU_GPR(R10)(r4)
@@ -1139,10 +1136,35 @@ BEGIN_FTR_SECTION
mtspr   SPRN_HDSISR, r0
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 
+   ld  r6, VCPU_KVM(r4)
+   lbz r7, KVM_SECURE_GUEST(r6)
+   cmpdi   r7, 0
+   bne ret_to_ultra
+
+   lwz r6, VCPU_CR(r4)
+   mtcrr6
+
+   ld  r7, VCPU_GPR(R7)(r4)
+   ld  r6, VCPU_GPR(R6)(r4)
ld  r0, VCPU_GPR(R0)(r4)
ld  r4, VCPU_GPR(R4)(r4)
HRFI_TO_GUEST
b   .
+/*
+ * The hcall we just completed was from Ultravisor. Use UV_RETURN
+ * ultra call to return to the Ultravisor. Results from the hcall
+ * are already in the appropriate registers (r3:12), except for
+ * R6,7 which we used as temporary registers above. Restore them,
+ * and set R0 to the ucall number (UV_RETURN).
+ */
+ret_to_ultra:
+   lwz r6, VCPU_CR(r4)
+   mtcrr6
+   LOAD_REG_IMMEDIATE(r0, UV_RETURN)
+   ld  r7, VCPU_GPR(R7)(r4)
+   ld  r6, VCPU_GPR(R6)(r4)
+   ld  r4, VCPU_GPR(R4)(r4)
+   sc  2
 
 /*
  * Enter the guest on a P9 or later system where we have exactly
-- 
2.20.1



[RFC PATCH v2 07/10] KVM: PPC: Ultravisor: Restrict LDBAR access

2019-05-18 Thread Claudio Carvalho
From: Ram Pai 

When the ultravisor firmware is available, it takes control over the
LDBAR register. In this case, thread-imc updates and save/restore
operations on the LDBAR register are handled by ultravisor.

Signed-off-by: Ram Pai 
[Restrict LDBAR access in assembly code and some in C, update the commit
 message]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/kvm/book3s_hv.c |  4 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  |  2 +
 arch/powerpc/perf/imc-pmu.c  | 64 
 arch/powerpc/platforms/powernv/idle.c|  6 +-
 arch/powerpc/platforms/powernv/subcore-asm.S |  4 ++
 5 files changed, 52 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 0fab0a201027..81f35f955d16 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -75,6 +75,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -3117,7 +3118,8 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore 
*vc)
subcore_size = MAX_SMT_THREADS / split;
split_info.rpr = mfspr(SPRN_RPR);
split_info.pmmar = mfspr(SPRN_PMMAR);
-   split_info.ldbar = mfspr(SPRN_LDBAR);
+   if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+   split_info.ldbar = mfspr(SPRN_LDBAR);
split_info.subcore_size = subcore_size;
} else {
split_info.subcore_size = 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index dd014308f065..938cfa5dceed 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -375,8 +375,10 @@ BEGIN_FTR_SECTION
mtspr   SPRN_RPR, r0
ld  r0, KVM_SPLIT_PMMAR(r6)
mtspr   SPRN_PMMAR, r0
+BEGIN_FW_FTR_SECTION_NESTED(70)
ld  r0, KVM_SPLIT_LDBAR(r6)
mtspr   SPRN_LDBAR, r0
+END_FW_FTR_SECTION_NESTED(FW_FEATURE_ULTRAVISOR, 0, 70)
isync
 FTR_SECTION_ELSE
/* On P9 we use the split_info for coordinating LPCR changes */
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 31fa753e2eb2..39c84de74da9 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -17,6 +17,7 @@
 #include 
 #include 
 #include 
+#include 
 
 /* Nest IMC data structures and variables */
 
@@ -816,6 +817,17 @@ static int core_imc_event_init(struct perf_event *event)
return 0;
 }
 
+static void thread_imc_ldbar_disable(void *dummy)
+{
+   /*
+* By Zeroing LDBAR, we disable thread-imc updates. When the ultravisor
+* firmware is available, it is responsible for handling thread-imc
+* updates, though
+*/
+   if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+   mtspr(SPRN_LDBAR, 0);
+}
+
 /*
  * Allocates a page of memory for each of the online cpus, and load
  * LDBAR with 0.
@@ -856,7 +868,7 @@ static int thread_imc_mem_alloc(int cpu_id, int size)
per_cpu(thread_imc_mem, cpu_id) = local_mem;
}
 
-   mtspr(SPRN_LDBAR, 0);
+   thread_imc_ldbar_disable(NULL);
return 0;
 }
 
@@ -867,7 +879,7 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu)
 
 static int ppc_thread_imc_cpu_offline(unsigned int cpu)
 {
-   mtspr(SPRN_LDBAR, 0);
+   thread_imc_ldbar_disable(NULL);
return 0;
 }
 
@@ -1010,7 +1022,6 @@ static int thread_imc_event_add(struct perf_event *event, 
int flags)
 {
int core_id;
struct imc_pmu_ref *ref;
-   u64 ldbar_value, *local_mem = per_cpu(thread_imc_mem, 
smp_processor_id());
 
if (flags & PERF_EF_START)
imc_event_start(event, flags);
@@ -1019,8 +1030,14 @@ static int thread_imc_event_add(struct perf_event 
*event, int flags)
return -EINVAL;
 
core_id = smp_processor_id() / threads_per_core;
-   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | 
THREAD_IMC_ENABLE;
-   mtspr(SPRN_LDBAR, ldbar_value);
+   if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+   u64 ldbar_value, *local_mem;
+
+   local_mem = per_cpu(thread_imc_mem, smp_processor_id());
+   ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) |
+   THREAD_IMC_ENABLE;
+   mtspr(SPRN_LDBAR, ldbar_value);
+   }
 
/*
 * imc pmus are enabled only when it is used.
@@ -1053,7 +1070,7 @@ static void thread_imc_event_del(struct perf_event 
*event, int flags)
int core_id;
struct imc_pmu_ref *ref;
 
-   mtspr(SPRN_LDBAR, 0);
+   thread_imc_ldbar_disable(NULL);
 
core_id = smp_processor_id() / threads_per_core;
ref = _imc_refc[core_id];
@@ -1109,7 +1126,7 @@ static int trace_imc_mem_alloc(int cpu_id, int size)

[RFC PATCH v2 06/10] KVM: PPC: Ultravisor: Restrict flush of the partition tlb cache

2019-05-18 Thread Claudio Carvalho
From: Ram Pai 

Ultravisor is responsible for flushing the tlb cache, since it manages
the PATE entries. Hence skip tlb flush, if the ultravisor firmware is
available.

Signed-off-by: Ram Pai 
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/mm/book3s64/pgtable.c | 33 +-
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 40a9fc8b139f..1eeb5fe87023 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -224,6 +224,23 @@ void __init mmu_partition_table_init(void)
powernv_set_nmmu_ptcr(ptcr);
 }
 
+static void flush_partition(unsigned int lpid, unsigned long dw0)
+{
+   if (dw0 & PATB_HR) {
+   asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 1) : :
+"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+   asm volatile(PPC_TLBIE_5(%0, %1, 2, 1, 1) : :
+"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
+   } else {
+   asm volatile(PPC_TLBIE_5(%0, %1, 2, 0, 0) : :
+"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
+   }
+   /* do we need fixup here ?*/
+   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+
 static void __mmu_partition_table_set_entry(unsigned int lpid,
unsigned long dw0,
unsigned long dw1)
@@ -238,20 +255,8 @@ static void __mmu_partition_table_set_entry(unsigned int 
lpid,
 * The type of flush (hash or radix) depends on what the previous
 * use of this partition ID was, not the new use.
 */
-   asm volatile("ptesync" : : : "memory");
-   if (old & PATB_HR) {
-   asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : :
-"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-   asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : :
-"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1);
-   } else {
-   asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
-"r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
-   trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0);
-   }
-   /* do we need fixup here ?*/
-   asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+   if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+   flush_partition(lpid, old);
 }
 
 void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
-- 
2.20.1



[RFC PATCH v2 05/10] KVM: PPC: Ultravisor: Use UV_WRITE_PATE ucall to register a PATE

2019-05-18 Thread Claudio Carvalho
From: Michael Anderson 

When running under an ultravisor, the ultravisor controls the real
partition table and has it in secure memory where the hypervisor can't
access it, and therefore we (the HV) have to do a ucall whenever we want
to update an entry.

The HV still keeps a copy of its view of the partition table in normal
memory so that the nest MMU can access it.

Both partition tables will have PATE entries for HV and normal virtual
machines.

Suggested-by: Ryan Grimm 
Signed-off-by: Michael Anderson 
Signed-off-by: Madhavan Srinivasan 
Signed-off-by: Ram Pai 
[Write the pate in HV's table before doing that in UV's]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/include/asm/ultravisor-api.h |  5 +++-
 arch/powerpc/include/asm/ultravisor.h |  9 ++
 arch/powerpc/mm/book3s64/hash_utils.c |  3 +-
 arch/powerpc/mm/book3s64/pgtable.c| 34 +--
 arch/powerpc/mm/book3s64/radix_pgtable.c  |  9 --
 5 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
index 5f538f33c704..24bfb4c1737e 100644
--- a/arch/powerpc/include/asm/ultravisor-api.h
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -15,6 +15,9 @@
 #define U_SUCCESS  H_SUCCESS
 #define U_FUNCTION H_FUNCTION
 #define U_PARAMETERH_PARAMETER
+#define U_PERMISSION   H_PERMISSION
 
-#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
+/* opcodes */
+#define UV_WRITE_PATE  0xF104
 
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index e8abc1bbc194..4ffec7a36acd 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -12,6 +12,8 @@
 
 #if !defined(__ASSEMBLY__)
 
+#include 
+
 /* Internal functions */
 extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
 int depth, void *data);
@@ -35,6 +37,13 @@ static long ucall(unsigned long opcode, unsigned long 
*retbuf, ...)
 }
 #endif
 
+static inline int uv_register_pate(u64 lpid, u64 dw0, u64 dw1)
+{
+   unsigned long retbuf[UCALL_BUFSIZE];
+
+   return ucall(UV_WRITE_PATE, retbuf, lpid, dw0, dw1);
+}
+
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 919a861a8ec0..8419e665fab0 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1080,9 +1080,10 @@ void hash__early_init_mmu_secondary(void)
 
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
-   else
+   else if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
mtspr(SPRN_PTCR,
  __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+
}
/* Initialize SLB */
slb_initialize();
diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
b/arch/powerpc/mm/book3s64/pgtable.c
index 16bda049187a..40a9fc8b139f 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -16,6 +16,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -206,12 +208,25 @@ void __init mmu_partition_table_init(void)
 * 64 K size.
 */
ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12);
-   mtspr(SPRN_PTCR, ptcr);
+   /*
+* If ultravisor is available, it is responsible for creating and
+* managing partition table
+*/
+   if (!firmware_has_feature(FW_FEATURE_ULTRAVISOR))
+   mtspr(SPRN_PTCR, ptcr);
+
+   /*
+* Since nestMMU cannot access secure memory. Create
+* and manage our own partition table. This table
+* contains entries for nonsecure and hypervisor
+* partition.
+*/
powernv_set_nmmu_ptcr(ptcr);
 }
 
-void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
-  unsigned long dw1)
+static void __mmu_partition_table_set_entry(unsigned int lpid,
+   unsigned long dw0,
+   unsigned long dw1)
 {
unsigned long old = be64_to_cpu(partition_tb[lpid].patb0);
 
@@ -238,6 +253,19 @@ void mmu_partition_table_set_entry(unsigned int lpid, 
unsigned long dw0,
/* do we need fixup here ?*/
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
 }
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+   __mmu_partition_table_set_entry(lpid, dw0, dw1);
+
+   if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) {
+   uv_register_pate(lpid, dw0, dw1);
+   pr_info("PATE registered by ultravisor: 

[RFC PATCH v2 03/10] powerpc: Introduce FW_FEATURE_ULTRAVISOR

2019-05-18 Thread Claudio Carvalho
This feature tells if the ultravisor firmware is available to handle
ucalls.

Signed-off-by: Claudio Carvalho 
[Device node name to "ibm,ultravisor"]
Signed-off-by: Michael Anderson 
---
 arch/powerpc/include/asm/firmware.h   |  5 +++--
 arch/powerpc/include/asm/ultravisor.h | 15 +++
 arch/powerpc/kernel/Makefile  |  1 +
 arch/powerpc/kernel/prom.c|  6 ++
 arch/powerpc/kernel/ultravisor.c  | 26 ++
 5 files changed, 51 insertions(+), 2 deletions(-)
 create mode 100644 arch/powerpc/include/asm/ultravisor.h
 create mode 100644 arch/powerpc/kernel/ultravisor.c

diff --git a/arch/powerpc/include/asm/firmware.h 
b/arch/powerpc/include/asm/firmware.h
index 00bc42d95679..43b48c4d3ca9 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -54,6 +54,7 @@
 #define FW_FEATURE_DRC_INFOASM_CONST(0x0008)
 #define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0010)
 #define FW_FEATURE_PAPR_SCMASM_CONST(0x0020)
+#define FW_FEATURE_ULTRAVISOR  ASM_CONST(0x0040)
 
 #ifndef __ASSEMBLY__
 
@@ -72,9 +73,9 @@ enum {
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
-   FW_FEATURE_PAPR_SCM,
+   FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_PSERIES_ALWAYS = 0,
-   FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
+   FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
FW_FEATURE_POWERNV_ALWAYS = 0,
FW_FEATURE_PS3_POSSIBLE = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
FW_FEATURE_PS3_ALWAYS = FW_FEATURE_LPAR | FW_FEATURE_PS3_LV1,
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
new file mode 100644
index ..e5009b0d84ea
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor definitions
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_H
+#define _ASM_POWERPC_ULTRAVISOR_H
+
+/* Internal functions */
+extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+int depth, void *data);
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 0ea6c4aa3a20..c8ca219e54bf 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -154,6 +154,7 @@ endif
 
 obj-$(CONFIG_EPAPR_PARAVIRT)   += epapr_paravirt.o epapr_hcalls.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvm_emul.o
+obj-$(CONFIG_PPC_UV)   += ultravisor.o
 
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 4221527b082f..8a9a8a319959 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -59,6 +59,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
@@ -713,6 +714,11 @@ void __init early_init_devtree(void *params)
of_scan_flat_dt(early_init_dt_scan_fw_dump, NULL);
 #endif
 
+#if defined(CONFIG_PPC_UV)
+   /* Scan tree for ultravisor feature */
+   of_scan_flat_dt(early_init_dt_scan_ultravisor, NULL);
+#endif
+
/* Retrieve various informations from the /chosen node of the
 * device-tree, including the platform type, initrd location and
 * size, TCE reserve, and more ...
diff --git a/arch/powerpc/kernel/ultravisor.c b/arch/powerpc/kernel/ultravisor.c
new file mode 100644
index ..ac23835bdf5a
--- /dev/null
+++ b/arch/powerpc/kernel/ultravisor.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ultravisor high level interfaces
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
+int depth, void *data)
+{
+   if (depth != 1 || strcmp(uname, "ibm,ultravisor") != 0)
+   return 0;
+
+   /* TODO: check the compatible devtree property once it is created */
+
+   powerpc_firmware_features |= FW_FEATURE_ULTRAVISOR;
+   pr_debug("Ultravisor detected!\n");
+   return 1;
+}
-- 
2.20.1



[RFC PATCH v2 04/10] KVM: PPC: Ultravisor: Add generic ultravisor call handler

2019-05-18 Thread Claudio Carvalho
From: Ram Pai 

Add the ucall() function, which can be used to make ultravisor calls
with varied number of in and out arguments. Ultravisor calls can be made
from the host or guests.

This copies the implementation of plpar_hcall().

Signed-off-by: Ram Pai 
[Change ucall.S to not save CR, rename and move the headers, build
 ucall.S if CONFIG_PPC_UV set, and add some comments in the code]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/include/asm/ultravisor-api.h | 20 +++
 arch/powerpc/include/asm/ultravisor.h | 25 ++
 arch/powerpc/kernel/Makefile  |  2 +-
 arch/powerpc/kernel/ucall.S   | 31 +++
 arch/powerpc/kernel/ultravisor.c  |  4 +++
 5 files changed, 81 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/include/asm/ultravisor-api.h
 create mode 100644 arch/powerpc/kernel/ucall.S

diff --git a/arch/powerpc/include/asm/ultravisor-api.h 
b/arch/powerpc/include/asm/ultravisor-api.h
new file mode 100644
index ..5f538f33c704
--- /dev/null
+++ b/arch/powerpc/include/asm/ultravisor-api.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Ultravisor calls.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#ifndef _ASM_POWERPC_ULTRAVISOR_API_H
+#define _ASM_POWERPC_ULTRAVISOR_API_H
+
+#include 
+
+/* Return codes */
+#define U_NOT_AVAILABLEH_NOT_AVAILABLE
+#define U_SUCCESS  H_SUCCESS
+#define U_FUNCTION H_FUNCTION
+#define U_PARAMETERH_PARAMETER
+
+#endif /* _ASM_POWERPC_ULTRAVISOR_API_H */
+
diff --git a/arch/powerpc/include/asm/ultravisor.h 
b/arch/powerpc/include/asm/ultravisor.h
index e5009b0d84ea..e8abc1bbc194 100644
--- a/arch/powerpc/include/asm/ultravisor.h
+++ b/arch/powerpc/include/asm/ultravisor.h
@@ -8,8 +8,33 @@
 #ifndef _ASM_POWERPC_ULTRAVISOR_H
 #define _ASM_POWERPC_ULTRAVISOR_H
 
+#include 
+
+#if !defined(__ASSEMBLY__)
+
 /* Internal functions */
 extern int early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
 int depth, void *data);
 
+/* API functions */
+#define UCALL_BUFSIZE 4
+/**
+ * ucall: Make a powerpc ultravisor call.
+ * @opcode: The ultravisor call to make.
+ * @retbuf: Buffer to store up to 4 return arguments in.
+ *
+ * This call supports up to 6 arguments and 4 return arguments. Use
+ * UCALL_BUFSIZE to size the return argument buffer.
+ */
+#if defined(CONFIG_PPC_UV)
+long ucall(unsigned long opcode, unsigned long *retbuf, ...);
+#else
+static long ucall(unsigned long opcode, unsigned long *retbuf, ...)
+{
+   return U_NOT_AVAILABLE;
+}
+#endif
+
+#endif /* !__ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_ULTRAVISOR_H */
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index c8ca219e54bf..43ff4546e469 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -154,7 +154,7 @@ endif
 
 obj-$(CONFIG_EPAPR_PARAVIRT)   += epapr_paravirt.o epapr_hcalls.o
 obj-$(CONFIG_KVM_GUEST)+= kvm.o kvm_emul.o
-obj-$(CONFIG_PPC_UV)   += ultravisor.o
+obj-$(CONFIG_PPC_UV)   += ultravisor.o ucall.o
 
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
diff --git a/arch/powerpc/kernel/ucall.S b/arch/powerpc/kernel/ucall.S
new file mode 100644
index ..ecc88998a13b
--- /dev/null
+++ b/arch/powerpc/kernel/ucall.S
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Generic code to perform an ultravisor call.
+ *
+ * Copyright 2019, IBM Corporation.
+ *
+ */
+#include 
+
+/*
+ * This function is based on the plpar_hcall()
+ */
+_GLOBAL_TOC(ucall)
+   mr  r0,r3
+   std r4,STK_PARAM(R4)(r1) /* Save ret buffer */
+   mr  r3,r5
+   mr  r4,r6
+   mr  r5,r7
+   mr  r6,r8
+   mr  r7,r9
+   mr  r8,r10
+
+   sc 2/* invoke the ultravisor */
+
+   ld  r12,STK_PARAM(R4)(r1)
+   std r4,  0(r12)
+   std r5,  8(r12)
+   std r6, 16(r12)
+   std r7, 24(r12)
+
+   blr /* return r3 = status */
diff --git a/arch/powerpc/kernel/ultravisor.c b/arch/powerpc/kernel/ultravisor.c
index ac23835bdf5a..9fbf0804ee4e 100644
--- a/arch/powerpc/kernel/ultravisor.c
+++ b/arch/powerpc/kernel/ultravisor.c
@@ -8,10 +8,14 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
 
+/* in ucall.S */
+EXPORT_SYMBOL_GPL(ucall);
+
 int __init early_init_dt_scan_ultravisor(unsigned long node, const char *uname,
 int depth, void *data)
 {
-- 
2.20.1



[RFC PATCH v2 02/10] KVM: PPC: Ultravisor: Introduce the MSR_S bit

2019-05-18 Thread Claudio Carvalho
From: Sukadev Bhattiprolu 

The ultravisor processor mode is introduced in POWER platforms that
supports the Protected Execution Facility (PEF). Ultravisor is higher
privileged than hypervisor mode.

In PEF enabled platforms, the MSR_S bit is used to indicate if the
thread is in secure state. With the MSR_S bit, the privilege state of
the thread is now determined by MSR_S, MSR_HV and MSR_PR, as follows:

S   HV  PR
---
0   x   1   problem
1   0   1   problem
x   x   0   privileged
x   1   0   hypervisor
1   1   0   ultravisor
1   1   1   reserved

The hypervisor doesn't (and can't) run with the MSR_S bit set, but a
secure guest and the ultravisor firmware do.

Signed-off-by: Sukadev Bhattiprolu 
Signed-off-by: Ram Pai 
[Update the commit message]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/include/asm/reg.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 10caa145f98b..39b4c0a519f5 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -38,6 +38,7 @@
 #define MSR_TM_LG  32  /* Trans Mem Available */
 #define MSR_VEC_LG 25  /* Enable AltiVec */
 #define MSR_VSX_LG 23  /* Enable VSX */
+#define MSR_S_LG   22  /* Secure VM bit */
 #define MSR_POW_LG 18  /* Enable Power Management */
 #define MSR_WE_LG  18  /* Wait State Enable */
 #define MSR_TGPR_LG17  /* TLB Update registers in use */
@@ -71,11 +72,13 @@
 #define MSR_SF __MASK(MSR_SF_LG)   /* Enable 64 bit mode */
 #define MSR_ISF__MASK(MSR_ISF_LG)  /* Interrupt 64b mode 
valid on 630 */
 #define MSR_HV __MASK(MSR_HV_LG)   /* Hypervisor state */
+#define MSR_S  __MASK(MSR_S_LG)/* Secure state */
 #else
 /* so tests for these bits fail on 32-bit */
 #define MSR_SF 0
 #define MSR_ISF0
 #define MSR_HV 0
+#define MSR_S  0
 #endif
 
 /*
-- 
2.20.1



[RFC PATCH v2 01/10] KVM: PPC: Ultravisor: Add PPC_UV config option

2019-05-18 Thread Claudio Carvalho
From: Anshuman Khandual 

CONFIG_PPC_UV adds support for ultravisor.

Signed-off-by: Anshuman Khandual 
Signed-off-by: Bharata B Rao 
Signed-off-by: Ram Pai 
[Update config help and commit message]
Signed-off-by: Claudio Carvalho 
---
 arch/powerpc/Kconfig | 20 
 1 file changed, 20 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2711aac24621..894171c863bc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -438,6 +438,26 @@ config PPC_TRANSACTIONAL_MEM
---help---
  Support user-mode Transactional Memory on POWERPC.
 
+config PPC_UV
+   bool "Ultravisor support"
+   depends on KVM_BOOK3S_HV_POSSIBLE
+   select HMM_MIRROR
+   select HMM
+   select ZONE_DEVICE
+   select MIGRATE_VMA_HELPER
+   select DEV_PAGEMAP_OPS
+   select DEVICE_PRIVATE
+   select MEMORY_HOTPLUG
+   select MEMORY_HOTREMOVE
+   default n
+   help
+ This option paravirtualizes the kernel to run in POWER platforms that
+ supports the Protected Execution Facility (PEF). In such platforms,
+ the ultravisor firmware runs at a privilege level above the
+ hypervisor.
+
+ If unsure, say "N".
+
 config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if 
EXPERT
depends on PPC64
-- 
2.20.1



[RFC PATCH v2 00/10] kvmppc: Paravirtualize KVM to support ultravisor

2019-05-18 Thread Claudio Carvalho
POWER platforms that supports the Protected Execution Facility (PEF)
introduce features that combine hardware facilities and firmware to
enable secure virtual machines. That includes a new processor mode
(ultravisor mode) and the ultravisor firmware.

In PEF enabled systems, the ultravisor firmware runs at a privilege
level above the hypervisor and also takes control over some system
resources. The hypervisor, though, can make system calls to access these
resources. Such system calls, a.k.a. ucalls, are handled by the
ultravisor firmware.

The processor allows part of the system memory to be configured as
secure memory, and introduces a a new mode, called secure mode, where
any software entity in that mode can access secure memory. The
hypervisor doesn't (and can't) run in secure mode, but a secure guest
and the ultravisor firmware do.

This patch set adds support for ultravisor calls and do some preparation
for running secure guests

---
Changelog:
---
v1->v2:
 - Addressed comments from Paul Mackerras:
 - Write the pate in HV's table before doing that in UV's
 - Renamed and better documented the ultravisor header files. Also added
   all possible return codes for each ucall
 - Updated the commit message that introduces the MSR_S bit
 - Moved ultravisor.c and ucall.S to arch/powerpc/kernel
 - Changed ucall.S to not save CR
 - Rebased
 - Changed the patches order
 - Updated several commit messages
 - Added FW_FEATURE_ULTRAVISOR to enable use of firmware_has_feature()
 - Renamed CONFIG_PPC_KVM_UV to CONFIG_PPC_UV and used it to ifdef the ucall
   handler and the code that populates the powerpc_firmware_features for 
   ultravisor
 - Exported the ucall symbol. KVM may be built as module.
 - Restricted LDBAR access if the ultravisor firmware is available
 - Dropped patches:
 "[PATCH 06/13] KVM: PPC: Ultravisor: UV_RESTRICTED_SPR_WRITE ucall"
 "[PATCH 07/13] KVM: PPC: Ultravisor: UV_RESTRICTED_SPR_READ ucall"
 "[PATCH 08/13] KVM: PPC: Ultravisor: fix mtspr and mfspr"
 - Squashed patches:
 "[PATCH 09/13] KVM: PPC: Ultravisor: Return to UV for hcalls from SVM"
 "[PATCH 13/13] KVM: PPC: UV: Have fast_guest_return check secure_guest"

Anshuman Khandual (1):
  KVM: PPC: Ultravisor: Add PPC_UV config option

Claudio Carvalho (1):
  powerpc: Introduce FW_FEATURE_ULTRAVISOR

Michael Anderson (2):
  KVM: PPC: Ultravisor: Use UV_WRITE_PATE ucall to register a PATE
  KVM: PPC: Ultravisor: Check for MSR_S during hv_reset_msr

Paul Mackerras (1):
  KVM: PPC: Book3S HV: Fixed for running secure guests

Ram Pai (3):
  KVM: PPC: Ultravisor: Add generic ultravisor call handler
  KVM: PPC: Ultravisor: Restrict flush of the partition tlb cache
  KVM: PPC: Ultravisor: Restrict LDBAR access

Sukadev Bhattiprolu (2):
  KVM: PPC: Ultravisor: Introduce the MSR_S bit
  KVM: PPC: Ultravisor: Return to UV for hcalls from SVM

 arch/powerpc/Kconfig | 20 ++
 arch/powerpc/include/asm/firmware.h  |  5 +-
 arch/powerpc/include/asm/kvm_host.h  |  1 +
 arch/powerpc/include/asm/reg.h   |  3 +
 arch/powerpc/include/asm/ultravisor-api.h| 24 
 arch/powerpc/include/asm/ultravisor.h| 49 +++
 arch/powerpc/kernel/Makefile |  1 +
 arch/powerpc/kernel/asm-offsets.c|  1 +
 arch/powerpc/kernel/prom.c   |  6 ++
 arch/powerpc/kernel/ucall.S  | 31 ++
 arch/powerpc/kernel/ultravisor.c | 30 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |  1 +
 arch/powerpc/kvm/book3s_hv.c |  4 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S  | 40 ++--
 arch/powerpc/mm/book3s64/hash_utils.c|  3 +-
 arch/powerpc/mm/book3s64/pgtable.c   | 65 +++-
 arch/powerpc/mm/book3s64/radix_pgtable.c |  9 ++-
 arch/powerpc/perf/imc-pmu.c  | 64 +++
 arch/powerpc/platforms/powernv/idle.c|  6 +-
 arch/powerpc/platforms/powernv/subcore-asm.S |  4 ++
 20 files changed, 311 insertions(+), 56 deletions(-)
 create mode 100644 arch/powerpc/include/asm/ultravisor-api.h
 create mode 100644 arch/powerpc/include/asm/ultravisor.h
 create mode 100644 arch/powerpc/kernel/ucall.S
 create mode 100644 arch/powerpc/kernel/ultravisor.c

-- 
2.20.1



Re: PROBLEM: Power9: kernel oops on memory hotunplug from ppc64le guest

2019-05-18 Thread Bharata B Rao
On Thu, May 16, 2019 at 07:44:20PM +0530, srikanth wrote:
> Hello,
> 
> On power9 host, performing memory hotunplug from ppc64le guest results in
> kernel oops.
> 
> Kernel used : https://github.com/torvalds/linux/tree/v5.1 built using
> ppc64le_defconfig for host and ppc64le_guest_defconfig for guest.
> 
> Recreation steps:
> 
> 1. Boot a guest with below mem configuration:
>   33554432
>   8388608
>   4194304
>   
>     
>   
>     
>   
> 
> 2. From host hotplug 8G memory -> verify memory hotadded succesfully -> now
> reboot guest -> once guest comes back try to unplug 8G memory
> 
> mem.xml used:
> 
> 
> 8
> 0
> 
> 
> 
> Memory attach and detach commands used:
>     virsh attach-device vm1 ./mem.xml --live
>     virsh detach-device vm1 ./mem.xml --live
> 
> Trace seen inside guest after unplug, guest just hangs there forever:
> 
> [   21.962986] kernel BUG at arch/powerpc/mm/pgtable-frag.c:113!
> [   21.963064] Oops: Exception in kernel mode, sig: 5 [#1]
> [   21.963090] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA
> pSeries
> [   21.963131] Modules linked in: xt_tcpudp iptable_filter squashfs fuse
> vmx_crypto ib_iser rdma_cm iw_cm ib_cm ib_core libiscsi scsi_transport_iscsi
> ip_tables x_tables autofs4 btrfs zstd_decompress zstd_compress lzo_compress
> raid10 raid456 async_raid6_recov async_memcpy async_pq async_xor async_tx
> xor raid6_pq multipath crc32c_vpmsum
> [   21.963281] CPU: 11 PID: 316 Comm: kworker/u64:5 Kdump: loaded Not
> tainted 5.1.0-dirty #2
> [   21.963323] Workqueue: pseries hotplug workque pseries_hp_work_fn
> [   21.963355] NIP:  c0079e18 LR: c0c79308 CTR:
> 8000
> [   21.963392] REGS: c003f88034f0 TRAP: 0700   Not tainted (5.1.0-dirty)
> [   21.963422] MSR:  8282b033   CR:
> 28002884  XER: 2004
> [   21.963470] CFAR: c0c79304 IRQMASK: 0
> [   21.963470] GPR00: c0c79308 c003f8803780 c1521000
> 00fff8c0
> [   21.963470] GPR04: 0001 ffe30005 0005
> 0020
> [   21.963470] GPR08:  0001 c00a00fff8e0
> c16d21a0
> [   21.963470] GPR12: c16e7b90 c7ff2700 c00a00a0
> c003ffe30100
> [   21.963470] GPR16: c003ffe3 c14aa4de c00a009f
> c16d21b0
> [   21.963470] GPR20: c14de588 0001 c16d21b8
> c00a00a0
> [   21.963470] GPR24:   c00a00a0
> c003ffe96000
> [   21.963470] GPR28: c00a00a0 c00a00a0 c003fffec000
> c00a00fff8c0
> [   21.963802] NIP [c0079e18] pte_fragment_free+0x48/0xd0
> [   21.963838] LR [c0c79308] remove_pagetable+0x49c/0x5b4
> [   21.963873] Call Trace:
> [   21.963890] [c003f8803780] [c003ffe997f0] 0xc003ffe997f0
> (unreliable)
> [   21.963933] [c003f88037b0] [] (null)
> [   21.963969] [c003f88038c0] [c006f038]
> vmemmap_free+0x218/0x2e0
> [   21.964006] [c003f8803940] [c036f100]
> sparse_remove_one_section+0xd0/0x138
> [   21.964050] [c003f8803980] [c0383a50]
> __remove_pages+0x410/0x560
> [   21.964093] [c003f8803a90] [c0c784d8]
> arch_remove_memory+0x68/0xdc
> [   21.964136] [c003f8803ad0] [c0385d74]
> __remove_memory+0xc4/0x110
> [   21.964180] [c003f8803b10] [c00d44e4]
> dlpar_remove_lmb+0x94/0x140
> [   21.964223] [c003f8803b50] [c00d52b4]
> dlpar_memory+0x464/0xd00
> [   21.964259] [c003f8803be0] [c00cd5c0]
> handle_dlpar_errorlog+0xc0/0x190
> [   21.964303] [c003f8803c50] [c00cd6bc]
> pseries_hp_work_fn+0x2c/0x60
> [   21.964346] [c003f8803c80] [c013a4a0]
> process_one_work+0x2b0/0x5a0
> [   21.964388] [c003f8803d10] [c013a818]
> worker_thread+0x88/0x610
> [   21.964434] [c003f8803db0] [c0143884] kthread+0x1a4/0x1b0
> [   21.964468] [c003f8803e20] [c000bdc4]
> ret_from_kernel_thread+0x5c/0x78
> [   21.964506] Instruction dump:
> [   21.964527] fbe1fff8 f821ffd1 78638502 78633664 ebe9 7fff1a14
> 395f0020 813f0020
> [   21.964569] 7d2907b4 7d2900d0 79290fe0 69290001 <0b09> 7c0004ac
> 7d205028 3129
> [   21.964613] ---[ end trace aaa571aa1636fee6 ]---
> [   21.966349]
> [   21.966383] Sending IPI to other CPUs
> [   21.978335] IPI complete
> [   21.981354] kexec: Starting switchover sequence.
> I'm in purgatory

git bisect points to

commit 4231aba000f5a4583dd9f67057aadb68c3eca99d
Author: Nicholas Piggin 
Date:   Fri Jul 27 21:48:17 2018 +1000

powerpc/64s: Fix page table fragment refcount race vs speculative references

The page table fragment allocator uses the main page refcount racily
with respect to speculative references. A customer observed a BUG due
to page table page refcount underflow in the fragment allocator. This
can be caused by the fragment allocator set_page_count stomping on a

Re: [PATCH] powerpc/mm/hash: Improve address limit checks

2019-05-18 Thread Michael Ellerman
On Thu, 2019-05-16 at 11:50:54 UTC, "Aneesh Kumar K.V" wrote:
> Different parts of the code do the limit check by ignoring the top nibble
> of EA. ie. we do checks like
> 
>   if ((ea & EA_MASK)  >= H_PGTABLE_RANGE)
>   error
> 
> This patch makes sure we don't insert SLB entries for addresses whose top 
> nibble
> doesn't match the ignored bits.
> 
> With an address like 0x4800, we can result in wrong slb entries 
> like
> 
> 13 4800 400ea1b217000510   1T ESID=   40 VSID=   ea1b217000 
> LLP:110
> 
> without this patch we will map that EA with LINEAR_MAP_REGION_ID and further
> those addr limit check will return false.
> 
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/c179976cf4cbd2e65f29741d5bc07ccf

cheers


Re: [PATCH] powerpc/mm: Drop VM_BUG_ON in get_region_id

2019-05-18 Thread Michael Ellerman
On Wed, 2019-05-15 at 09:45:23 UTC, "Aneesh Kumar K.V" wrote:
> We can call get_region_id without validating the ea value. That means
> with a wrong ea value we hit the BUG as below.
> 
>  kernel BUG at arch/powerpc/include/asm/book3s/64/hash.h:129!
>  Oops: Exception in kernel mode, sig: 5 [#1]
>  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
>  CPU: 0 PID: 3937 Comm: access_tests Not tainted 5.1.0
>  
>  NIP [c007ba20] do_slb_fault+0x70/0x320
>  LR [c000896c] data_access_slb_common+0x15c/0x1a0
> 
> Fix this by removing the VM_BUG_ON. All callers make sure the returned region 
> id
> is valid and error out otherwise.
> 
> Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the 
> same 0xc range")
> Signed-off-by: Aneesh Kumar K.V 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/6457f42eb3f6e9552366631bd5aeb096

cheers


Re: [PATCH] powerpc: Remove double free

2019-05-18 Thread Michael Ellerman
On Wed, 2019-05-15 at 09:07:50 UTC, "Tobin C. Harding" wrote:
> kfree() after kobject_put().  Who ever wrote this was on crack.
> 
> Fixes: 7e8039795a80 ("powerpc/cacheinfo: Fix kobject memleak")
> Signed-off-by: Tobin C. Harding 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/672eaf37db9f99fd794eed2c68a8b3b0

cheers


Re: [PATCH] powerpc/mm: Fix crashes with hugepages & 4K pages

2019-05-18 Thread Michael Ellerman
On Tue, 2019-05-14 at 13:43:21 UTC, Michael Ellerman wrote:
> The recent commit to cleanup ifdefs in the hugepage initialisation led
> to crashes when using 4K pages as reported by Sachin:
> 
>   BUG: Kernel NULL pointer dereference at 0x001c
>   Faulting instruction address: 0xc1d1e58c
>   Oops: Kernel access of bad area, sig: 11 [#1]
>   LE PAGE_SIZE=4K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
>   ...
>   CPU: 3 PID: 4635 Comm: futex_wake04 Tainted: GW  O  
> 5.1.0-next-20190507-autotest #1
>   NIP:  c1d1e58c LR: c1d1e54c CTR: 
>   REGS: c4937890 TRAP: 0300
>   MSR:  80009033   CR: 22424822  XER: 
>   CFAR: c183e9e0 DAR: 001c DSISR: 4000 IRQMASK: 0
>   ...
>   NIP kmem_cache_alloc+0xbc/0x5a0
>   LR  kmem_cache_alloc+0x7c/0x5a0
>   Call Trace:
> huge_pte_alloc+0x580/0x950
> hugetlb_fault+0x9a0/0x1250
> handle_mm_fault+0x490/0x4a0
> __do_page_fault+0x77c/0x1f00
> do_page_fault+0x28/0x50
> handle_page_fault+0x18/0x38
> 
> This is caused by us trying to allocate from a NULL kmem cache in
> __hugepte_alloc(). The kmem cache is NULL because it was never
> allocated in hugetlbpage_init(), because add_huge_page_size() returned
> an error.
> 
> The reason add_huge_page_size() returned an error is a simple typo, we
> are calling check_and_get_huge_psize(size) when we should be passing
> shift instead.
> 
> The fact that we're able to trigger this path when the kmem caches are
> NULL is a separate bug, ie. we should not advertise any hugepage sizes
> if we haven't setup the required caches for them.
> 
> This was only seen with 4K pages, with 64K pages we don't need to
> allocate any extra kmem caches because the 16M hugepage just occupies
> a single entry at the PMD level.
> 
> Fixes: 723f268f19da ("powerpc/mm: cleanup ifdef mess in add_huge_page_size()")
> Reported-by: Sachin Sant 
> Tested-by: Sachin Sant 
> Signed-off-by: Michael Ellerman 
> Reviewed-by: Christophe Leroy 
> Reviewed-by: Aneesh Kumar K.V 

Applied to powerpc fixes.

https://git.kernel.org/powerpc/c/7338874c337f01dc84597a5500a58873

cheers


Re: [PATCH] powerpc/32s: fix flush_hash_pages() on SMP

2019-05-18 Thread Michael Ellerman
On Thu, 2019-05-09 at 12:59:38 UTC, Christophe Leroy wrote:
> flush_hash_pages() runs with data translation off, so current
> task_struct has to be accesssed using physical address.
> 
> Reported-by: Erhard F. 
> Fixes: f7354ccac844 ("powerpc/32: Remove CURRENT_THREAD_INFO and rename 
> TI_CPU")
> Cc: sta...@vger.kernel.org
> Signed-off-by: Christophe Leroy 

Applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/397d2300b08cdee052053e362018cdb6

cheers


[GIT PULL] Please pull powerpc/linux.git powerpc-5.2-2 tag

2019-05-18 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA1

Hi Linus,

Please pull some powerpc fixes for 5.2:

The following changes since commit b970afcfcabd63cd3832e95db096439c177c3592:

  Merge tag 'powerpc-5.2-1' of 
ssh://gitolite.kernel.org/pub/scm/linux/kernel/git/powerpc/linux (2019-05-10 
05:29:27 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.2-2

for you to fetch changes up to 672eaf37db9f99fd794eed2c68a8b3b05d484081:

  powerpc/cacheinfo: Remove double free (2019-05-17 23:28:00 +1000)

- --
powerpc fixes for 5.2 #2

One fix going back to stable, for a bug on 32-bit introduced when we added
support for THREAD_INFO_IN_TASK.

A fix for a typo in a recent rework of our hugetlb code that leads to crashes on
64-bit when using hugetlbfs with a 4K PAGE_SIZE.

Two fixes for our recent rework of the address layout on 64-bit hash CPUs, both
only triggered when userspace tries to access addresses outside the user or
kernel address ranges.

Finally a fix for a recently introduced double free in an error path in our
cacheinfo code.

Thanks to:
  Aneesh Kumar K.V, Christophe Leroy, Sachin Sant, Tobin C. Harding.

- --
Aneesh Kumar K.V (2):
  powerpc/mm: Drop VM_BUG_ON in get_region_id()
  powerpc/mm/hash: Fix get_region_id() for invalid addresses

Christophe Leroy (1):
  powerpc/32s: fix flush_hash_pages() on SMP

Michael Ellerman (1):
  powerpc/mm: Fix crashes with hugepages & 4K pages

Tobin C. Harding (1):
  powerpc/cacheinfo: Remove double free


 arch/powerpc/include/asm/book3s/64/hash.h | 6 --
 arch/powerpc/kernel/cacheinfo.c   | 1 -
 arch/powerpc/mm/book3s32/hash_low.S   | 3 ++-
 arch/powerpc/mm/hugetlbpage.c | 2 +-
 4 files changed, 7 insertions(+), 5 deletions(-)
-BEGIN PGP SIGNATURE-

iQIcBAEBAgAGBQJc3+h8AAoJEFHr6jzI4aWA8wQQAK6ChAWbSy5Xb8E9q/txwxhV
tCjg+CEbs4a5WpVPnL7oZm/axUEJVRDk++f1MydxnB2C6TDk0rBM20Umeh548ALP
RWe65asxMHHN+TihSSLf1kN5M+/6gnkRysg++u54KH0RU1fMj+KG+NudcFABJPZ/
5x7Pr1Ffy3FtEqlFAA3vjxioHOFA9aWB1nmjiW8osf01R7KQXNoZk7IX3dZ8YQce
aALvCx/OXqxJtiAj8ynb422nkBgDtwR5haEKZ8gxeVLE1hPnIXaSZfRHYvk3GvYY
t0q37adNeLR2a4UNB1dMaTiNn+6qANJO9tC4ITSsH5KzxIFJ6GI6fMfp+XDxI/pY
7CLAzO5WO466ar/XnJN1Z6cJXfC/WT8h9delzErFm8omEE2wHd+XsxffM7ToATh8
/ZMEip+gmKRU08qm46gU8o823Qpp0A85f5d5LJHjVWRomXTKovTNQvveBCIEB7H4
KKiWPOdo/vTUKh68l6ROR6g96qQbDjnzDZ5yUBOmHE1XXGjJqURtlm8hkhUO6/ws
+awGShDim6nfpOwqlDrpnu9wB0W7HxUDU32pvl0bi5VSJyZckPzaPB2MoVuIf5Aa
8i0x231708Kk8htB4n6tD7gJtrTWEZG3dyRwYZzU1FKKzrbItbDFcwN3H9O+QU5H
Xgk5Sd3CAiEQMbDE+t5x
=Iuyr
-END PGP SIGNATURE-


Re: [PATCH v2] powerpc/mm/hash: Fix get_region_id() for invalid addresses

2019-05-18 Thread Michael Ellerman
Nicholas Piggin  writes:
> Michael Ellerman's on May 17, 2019 11:29 pm:
>> From: "Aneesh Kumar K.V" 
>> 
>> Accesses by userspace to random addresses outside the user or kernel
>> address range will generate an SLB fault. When we handle that fault we
>> classify the effective address into several classes, eg. user, kernel
>> linear, kernel virtual etc.
>> 
>> For addresses that are completely outside of any valid range, we
>> should not insert an SLB entry at all, and instead immediately an
>> exception.
>> 
>> In the past this was handled in two ways. Firstly we would check the
>> top nibble of the address (using REGION_ID(ea)) and that would tell us
>> if the address was user (0), kernel linear (c), kernel virtual (d), or
>> vmemmap (f). If the address didn't match any of these it was invalid.
>> 
>> Then for each type of address we would do a secondary check. For the
>> user region we check against H_PGTABLE_RANGE, for kernel linear we
>> would mask the top nibble of the address and then check the address
>> against MAX_PHYSMEM_BITS.
>> 
>> As part of commit 0034d395f89d ("powerpc/mm/hash64: Map all the kernel
>> regions in the same 0xc range") we replaced REGION_ID() with
>> get_region_id() and changed the masking of the top nibble to only mask
>> the top two bits, which introduced a bug.
>> 
>> Addresses less than (4 << 60) are still handled correctly, they are
>> either less than (1 << 60) in which case they are subject to the
>> H_PGTABLE_RANGE check, or they are correctly checked against
>> MAX_PHYSMEM_BITS.
>> 
>> However addresses from (4 << 60) to ((0xc << 60) - 1), are incorrectly
>> treated as kernel linear addresses in get_region_id(). Then the top
>> two bits are cleared by EA_MASK in slb_allocate_kernel() and the
>> address is checked against MAX_PHYSMEM_BITS, which it passes due to
>> the masking. The end result is we incorrectly insert SLB entries for
>> those addresses.
>> 
>> That is not actually catastrophic, having inserted the SLB entry we
>> will then go on to take a page fault for the address and at that point
>> we detect the problem and report it as a bad fault.
>> 
>> Still we should not be inserting those entries, or treating them as
>> kernel linear addresses in the first place. So fix get_region_id() to
>> detect addresses in that range and return an invalid region id, which
>> we cause use to not insert an SLB entry and directly report an
>> exception.
>> 
>> Fixes: 0034d395f89d ("powerpc/mm/hash64: Map all the kernel regions in the 
>> same 0xc range")
>> Signed-off-by: Aneesh Kumar K.V 
>> [mpe: Drop change to EA_MASK for now, rewrite change log]
>> Signed-off-by: Michael Ellerman 
>
> Looks good to me.

Thanks for reviewing.

cheers


Re: [PATCH v1 1/2] pid: add pidfd_open()

2019-05-18 Thread Christian Brauner
On Sat, May 18, 2019 at 05:48:03AM -0400, Joel Fernandes wrote:
> Hi Christian,
> 
> For next revision, could you also CC sur...@google.com as well? He is also
> working on the low memory killer. And also suggest CC to
> kernel-t...@android.com. And mentioned some comments below, thanks.

Yip, totally. Just added them both to my Cc list. :)
(I saw you added Suren manually. I added the Android kernel team now too.)

> 
> On Thu, May 16, 2019 at 03:59:42PM +0200, Christian Brauner wrote:
> [snip]  
> > diff --git a/kernel/pid.c b/kernel/pid.c
> > index 20881598bdfa..4afca3d6dcb8 100644
> > --- a/kernel/pid.c
> > +++ b/kernel/pid.c
> > @@ -38,6 +38,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >  
> > @@ -451,6 +452,55 @@ struct pid *find_ge_pid(int nr, struct pid_namespace 
> > *ns)
> > return idr_get_next(>idr, );
> >  }
> >  
> > +/**
> > + * pidfd_open() - Open new pid file descriptor.
> > + *
> > + * @pid:   pid for which to retrieve a pidfd
> > + * @flags: flags to pass
> > + *
> > + * This creates a new pid file descriptor with the O_CLOEXEC flag set for
> > + * the process identified by @pid. Currently, the process identified by
> > + * @pid must be a thread-group leader. This restriction currently exists
> > + * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
> > + * be used with CLONE_THREAD) and pidfd polling (only supports thread group
> > + * leaders).
> > + *
> > + * Return: On success, a cloexec pidfd is returned.
> > + * On error, a negative errno number will be returned.
> > + */
> > +SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
> > +{
> > +   int fd, ret;
> > +   struct pid *p;
> > +   struct task_struct *tsk;
> > +
> > +   if (flags)
> > +   return -EINVAL;
> > +
> > +   if (pid <= 0)
> > +   return -EINVAL;
> > +
> > +   p = find_get_pid(pid);
> > +   if (!p)
> > +   return -ESRCH;
> > +
> > +   ret = 0;
> > +   rcu_read_lock();
> > +   /*
> > +* If this returns non-NULL the pid was used as a thread-group
> > +* leader. Note, we race with exec here: If it changes the
> > +* thread-group leader we might return the old leader.
> > +*/
> > +   tsk = pid_task(p, PIDTYPE_TGID);
> 
> Just trying to understand the comment here. The issue is that we might either
> return the new leader, or the old leader depending on the overlap with
> concurrent de_thread right? In either case, we don't care though.
> 
> I suggest to remove the "Note..." part of the comment since it doesn't seem 
> the
> race is relevant here unless we are doing something else with tsk in the
> function, but if you want to keep it that's also fine. Comment text should
> probably should be 'return the new leader' though.

Nah, I actually removed the comment already independently (cf. see [1]).

[1]: 
https://git.kernel.org/pub/scm/linux/kernel/git/brauner/linux.git/commit/?h=pidfd_open=dcfc98c2d957bf3ac14b06414cb2cf4c673fc297
> 
> > +   if (!tsk)
> > +   ret = -ESRCH;
> 
> Perhaps -EINVAL?  AFAICS, this can only happen if a CLONE_THREAD pid was
> passed as argument to pidfd_open which is invalid. But let me know what you
> had in mind..

Hm, from the kernel's perspective ESRCH is correct but I guess EINVAL is
nicer for userspace. So I don't have objections to using EINVAL. My
first version did too I think.

Thanks!
Christian


Re: [PATCH v1 1/2] pid: add pidfd_open()

2019-05-18 Thread Joel Fernandes
Hi Christian,

For next revision, could you also CC sur...@google.com as well? He is also
working on the low memory killer. And also suggest CC to
kernel-t...@android.com. And mentioned some comments below, thanks.

On Thu, May 16, 2019 at 03:59:42PM +0200, Christian Brauner wrote:
[snip]  
> diff --git a/kernel/pid.c b/kernel/pid.c
> index 20881598bdfa..4afca3d6dcb8 100644
> --- a/kernel/pid.c
> +++ b/kernel/pid.c
> @@ -38,6 +38,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  
> @@ -451,6 +452,55 @@ struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
>   return idr_get_next(>idr, );
>  }
>  
> +/**
> + * pidfd_open() - Open new pid file descriptor.
> + *
> + * @pid:   pid for which to retrieve a pidfd
> + * @flags: flags to pass
> + *
> + * This creates a new pid file descriptor with the O_CLOEXEC flag set for
> + * the process identified by @pid. Currently, the process identified by
> + * @pid must be a thread-group leader. This restriction currently exists
> + * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
> + * be used with CLONE_THREAD) and pidfd polling (only supports thread group
> + * leaders).
> + *
> + * Return: On success, a cloexec pidfd is returned.
> + * On error, a negative errno number will be returned.
> + */
> +SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
> +{
> + int fd, ret;
> + struct pid *p;
> + struct task_struct *tsk;
> +
> + if (flags)
> + return -EINVAL;
> +
> + if (pid <= 0)
> + return -EINVAL;
> +
> + p = find_get_pid(pid);
> + if (!p)
> + return -ESRCH;
> +
> + ret = 0;
> + rcu_read_lock();
> + /*
> +  * If this returns non-NULL the pid was used as a thread-group
> +  * leader. Note, we race with exec here: If it changes the
> +  * thread-group leader we might return the old leader.
> +  */
> + tsk = pid_task(p, PIDTYPE_TGID);

Just trying to understand the comment here. The issue is that we might either
return the new leader, or the old leader depending on the overlap with
concurrent de_thread right? In either case, we don't care though.

I suggest to remove the "Note..." part of the comment since it doesn't seem the
race is relevant here unless we are doing something else with tsk in the
function, but if you want to keep it that's also fine. Comment text should
probably should be 'return the new leader' though.

> + if (!tsk)
> + ret = -ESRCH;

Perhaps -EINVAL?  AFAICS, this can only happen if a CLONE_THREAD pid was
passed as argument to pidfd_open which is invalid. But let me know what you
had in mind..

thanks,

 - Joel

> + rcu_read_unlock();
> +
> + fd = ret ?: pidfd_create(p);
> + put_pid(p);
> + return fd;
> +}
> +
>  void __init pid_idr_init(void)
>  {
>   /* Verify no one has done anything silly: */
> -- 
> 2.21.0
>