[PATCH] cpufreq: powernv: Dont assume distinct pstate values for nominal and pmin

2018-01-11 Thread Shilpasri G Bhat
Some OpenPOWER boxes can have same pstate values for nominal and
pmin pstates. In these boxes the current code will not initialize
'powernv_pstate_info.min' variable and result in erroneous CPU
frequency reporting. This patch fixes this problem.

Fixes: 09ca4c9b5958 ("cpufreq: powernv: Replacing pstate_id with frequency 
table index")
Reported-by: Alvin Wang 
Signed-off-by: Shilpasri G Bhat 
---
 drivers/cpufreq/powernv-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/powernv-cpufreq.c 
b/drivers/cpufreq/powernv-cpufreq.c
index b6d7c4c..da7fdb4 100644
--- a/drivers/cpufreq/powernv-cpufreq.c
+++ b/drivers/cpufreq/powernv-cpufreq.c
@@ -288,9 +288,9 @@ static int init_powernv_pstates(void)
 
if (id == pstate_max)
powernv_pstate_info.max = i;
-   else if (id == pstate_nominal)
+   if (id == pstate_nominal)
powernv_pstate_info.nominal = i;
-   else if (id == pstate_min)
+   if (id == pstate_min)
powernv_pstate_info.min = i;
 
if (powernv_pstate_info.wof_enabled && id == pstate_turbo) {
-- 
1.8.3.1



[PATCH 2/2] powerpc/xive: Add interrupt flag to disable automatic EOI

2018-01-11 Thread Benjamin Herrenschmidt
This will be used by KVM in order to keep escalation interrupts
in the non-EOI (masked) state after they fire. They will be
re-enabled directly in HW by KVM when needed.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/xive.h   | 3 +++
 arch/powerpc/sysdev/xive/common.c | 3 ++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 0e77005cf021..b619a5585cd6 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -58,6 +58,9 @@ struct xive_irq_data {
 #define XIVE_IRQ_FLAG_EOI_FW   0x10
 #define XIVE_IRQ_FLAG_H_INT_ESB0x20
 
+/* Special flag set by KVM for excalation interrupts */
+#define XIVE_IRQ_NO_EOI0x80
+
 #define XIVE_INVALID_CHIP_ID   -1
 
 /* A queue tracking structure in a CPU */
diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index 838ebdbfe4c5..40c06110821c 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -367,7 +367,8 @@ static void xive_irq_eoi(struct irq_data *d)
 * EOI the source if it hasn't been disabled and hasn't
 * been passed-through to a KVM guest
 */
-   if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d))
+   if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
+   !(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
 
/*
-- 
2.14.3



[PATCH 1/2] powerpc/xive: Move definition of ESB bits

2018-01-11 Thread Benjamin Herrenschmidt
>From xive.h to xive-regs.h since it's a HW register definition
and it can be used from assembly

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/xive-regs.h | 35 +++
 arch/powerpc/include/asm/xive.h  | 35 ---
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/arch/powerpc/include/asm/xive-regs.h 
b/arch/powerpc/include/asm/xive-regs.h
index 1d3f2be5ae39..fa4288822b68 100644
--- a/arch/powerpc/include/asm/xive-regs.h
+++ b/arch/powerpc/include/asm/xive-regs.h
@@ -9,6 +9,41 @@
 #ifndef _ASM_POWERPC_XIVE_REGS_H
 #define _ASM_POWERPC_XIVE_REGS_H
 
+/*
+ * "magic" Event State Buffer (ESB) MMIO offsets.
+ *
+ * Each interrupt source has a 2-bit state machine called ESB
+ * which can be controlled by MMIO. It's made of 2 bits, P and
+ * Q. P indicates that an interrupt is pending (has been sent
+ * to a queue and is waiting for an EOI). Q indicates that the
+ * interrupt has been triggered while pending.
+ *
+ * This acts as a coalescing mechanism in order to guarantee
+ * that a given interrupt only occurs at most once in a queue.
+ *
+ * When doing an EOI, the Q bit will indicate if the interrupt
+ * needs to be re-triggered.
+ *
+ * The following offsets into the ESB MMIO allow to read or
+ * manipulate the PQ bits. They must be used with an 8-bytes
+ * load instruction. They all return the previous state of the
+ * interrupt (atomically).
+ *
+ * Additionally, some ESB pages support doing an EOI via a
+ * store at 0 and some ESBs support doing a trigger via a
+ * separate trigger page.
+ */
+#define XIVE_ESB_STORE_EOI 0x400 /* Store */
+#define XIVE_ESB_LOAD_EOI  0x000 /* Load */
+#define XIVE_ESB_GET   0x800 /* Load */
+#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
+#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
+#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
+#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
+
+#define XIVE_ESB_VAL_P 0x2
+#define XIVE_ESB_VAL_Q 0x1
+
 /*
  * Thread Management (aka "TM") registers
  */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index 371fbebf1ec9..0e77005cf021 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -72,41 +72,6 @@ struct xive_q {
atomic_tpending_count;
 };
 
-/*
- * "magic" Event State Buffer (ESB) MMIO offsets.
- *
- * Each interrupt source has a 2-bit state machine called ESB
- * which can be controlled by MMIO. It's made of 2 bits, P and
- * Q. P indicates that an interrupt is pending (has been sent
- * to a queue and is waiting for an EOI). Q indicates that the
- * interrupt has been triggered while pending.
- *
- * This acts as a coalescing mechanism in order to guarantee
- * that a given interrupt only occurs at most once in a queue.
- *
- * When doing an EOI, the Q bit will indicate if the interrupt
- * needs to be re-triggered.
- *
- * The following offsets into the ESB MMIO allow to read or
- * manipulate the PQ bits. They must be used with an 8-bytes
- * load instruction. They all return the previous state of the
- * interrupt (atomically).
- *
- * Additionally, some ESB pages support doing an EOI via a
- * store at 0 and some ESBs support doing a trigger via a
- * separate trigger page.
- */
-#define XIVE_ESB_STORE_EOI 0x400 /* Store */
-#define XIVE_ESB_LOAD_EOI  0x000 /* Load */
-#define XIVE_ESB_GET   0x800 /* Load */
-#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
-#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
-#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
-#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
-
-#define XIVE_ESB_VAL_P 0x2
-#define XIVE_ESB_VAL_Q 0x1
-
 /* Global enable flags for the XIVE support */
 extern bool __xive_enabled;
 
-- 
2.14.3



Re: [PATCH 00/26] KVM: PPC: Book3S PR: Transaction memory support on PR KVM

2018-01-11 Thread Simon Guo
Hi Gustavo,
On Thu, Jan 11, 2018 at 11:56:59AM -0200, Gustavo Romero wrote:
> Hi Simon,
> 
> On 01/11/2018 08:11 AM, wei.guo.si...@gmail.com wrote:
> > From: Simon Guo 
> > 
> > In current days, many OS distributions have utilized transaction
> > memory functionality. In PowerPC, HV KVM supports TM. But PR KVM
> > does not.
> > 
> > The drive for the transaction memory support of PR KVM is the
> > openstack Continuous Integration testing - They runs a HV(hypervisor)
> > KVM(as level 1) and then run PR KVM(as level 2) on top of that.
> > 
> > This patch set add transaction memory support on PR KVM.
> 
> Is this correct to assume that this emulation mode will just kick in on P9
> with kernel TM workarounds and HV KVM will continue to be used on POWER8
> since HV KVM is supported on POWER8 hosts?

As Ben mentioned, this patch set aims to enhancement PR KVM on Power8
to support transaction memory.

Thanks,
- Simon

> 
> 
> Regards,
> Gustavo
> 
> > Test cases performed:
> > linux/tools/testing/selftests/powerpc/tm/tm-syscall
> > linux/tools/testing/selftests/powerpc/tm/tm-fork
> > linux/tools/testing/selftests/powerpc/tm/tm-vmx-unavail
> > linux/tools/testing/selftests/powerpc/tm/tm-tmspr
> > linux/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv
> > linux/tools/testing/selftests/powerpc/math/vsx_preempt
> > linux/tools/testing/selftests/powerpc/math/fpu_signal
> > linux/tools/testing/selftests/powerpc/math/vmx_preempt
> > linux/tools/testing/selftests/powerpc/math/fpu_syscall
> > linux/tools/testing/selftests/powerpc/math/vmx_syscall
> > linux/tools/testing/selftests/powerpc/math/fpu_preempt
> > linux/tools/testing/selftests/powerpc/math/vmx_signal
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx
> > https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c
> > https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c
> > https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c
> > 
> > Simon Guo (25):
> >   KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate
> > file
> >   KVM: PPC: Book3S PR: add new parameter (guest MSR) for
> > kvmppc_save_tm()/kvmppc_restore_tm()
> >   KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()
> >   KVM: PPC: Book3S PR: add C function wrapper for
> > _kvmppc_save/restore_tm()
> >   KVM: PPC: Book3S PR: In PR KVM suspends Transactional state when
> > inject an interrupt.
> >   KVM: PPC: Book3S PR: PR KVM pass through MSR TM/TS bits to shadow_msr.
> >   KVM: PPC: Book3S PR: add TEXASR related macros
> >   KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state
> > guest
> >   KVM: PPC: Book3S PR: implement RFID TM behavior to suppress change
> > from S0 to N0
> >   KVM: PPC: Book3S PR: set MSR HV bit accordingly for PPC970 and others.
> >   KVM: PPC: Book3S PR: prevent TS bits change in kvmppc_interrupt_pr()
> >   powerpc: export symbol msr_check_and_set().
> >   KVM: PPC: Book3S PR: adds new
> > kvmppc_copyto_vcpu_tm/kvmppc_copyfrom_vcpu_tm API for PR KVM.
> >   KVM: PPC: Book3S PR: export tm_enable()/tm_disable/tm_abort() APIs
> >   KVM: PPC: Book3S PR: add kvmppc_save/restore_tm_sprs() APIs
> >   KVM: PPC: Book3S PR: add transaction memory save/restore skeleton for
> > PR KVM
> >   KVM: PPC: Book3S PR: add math support for PR KVM HTM
> >   KVM: PPC: Book3S PR: make mtspr/mfspr emulation behavior based on
> > active TM SPRs
> >   KVM: PPC: Book3S PR: always fail transaction in guest privilege state
> >   KVM: PPC: Book3S PR: enable NV reg restore for reading TM SPR at guest
> > privilege state
> >   KVM: PPC: Book3S PR: adds emulation for treclaim.
> >   KVM: PPC: Book3S PR: add emulation for trechkpt in PR KVM.
> >   KVM: PPC: Book3S PR: add emulation for tabort. for privilege guest
> >   KVM: PPC: Book3S PR: add guard code to prevent returning to guest with
> > PR=0 and Transactional state
> >   KVM: PPC: Book3S PR: enable HTM for PR KVM for KVM_CHECK_EXTENSION
> > ioctl
> > 
> >  arch/powerpc/include/asm/asm-prototypes.h   |  10 +
> >  arch/powerpc/include/asm/kvm_book3s.h   |   8 +
> >  arch/powerpc/include/asm/kvm_host.h |   3 +
> >  arch/powerpc/include/asm/reg.h  |  25 +-
> >  arch/powerpc/include/asm/tm.h   |   2 -
> >  arch/powerpc/include/uapi/asm/tm.h  |   2 +-
> >  arch/powerpc/kernel/process.c   |   1 +
> >  arch/powerpc/kernel/tm.S|  12 +
> >  arch/powerpc/kvm/Makefile   |   3 +
> >  arch/powerpc/kvm/book3s.h   |   1 +
> >  arch/powerpc/kvm/book3s_64_mmu.c|  11 +-
> >  arch/powerpc/kvm/book3s_emulate.c   | 

[PATCH v4 5/6] powerpc/kvm/xive: Make xive_pushed a byte, not a word

2018-01-11 Thread Benjamin Herrenschmidt
Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/kvm_host.h | 2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index bfe51356af5e..0c44fa67608d 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -739,7 +739,7 @@ struct kvm_vcpu_arch {
struct kvmppc_icp *icp; /* XICS presentation controller */
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word;/* Cooked W2 in proper endian with valid bit */
-   u32 xive_pushed; /* Is the VP pushed on the physical CPU ? */
+   u8 xive_pushed;  /* Is the VP pushed on the physical CPU ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
 #endif
 
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 506a1c775370..46dc250d072a 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1031,7 +1031,7 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li  r9, TM_QW1_OS + TM_WORD2
stwcix  r11,r9,r10
li  r9, 1
-   stw r9, VCPU_XIVE_PUSHED(r4)
+   stb r9, VCPU_XIVE_PUSHED(r4)
eieio
 
/*
@@ -1436,7 +1436,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
 guest_exit_cont:   /* r9 = vcpu, r12 = trap, r13 = paca */
 #ifdef CONFIG_KVM_XICS
/* We are exiting, pull the VP from the XIVE */
-   lwz r0, VCPU_XIVE_PUSHED(r9)
+   lbz r0, VCPU_XIVE_PUSHED(r9)
cmpwi   cr0, r0, 0
beq 1f
li  r7, TM_SPC_PULL_OS_CTX
@@ -1465,7 +1465,7 @@ guest_exit_cont:  /* r9 = vcpu, r12 = trap, r13 = 
paca */
/* Fixup some of the state for the next load */
li  r10, 0
li  r0, 0xff
-   stw r10, VCPU_XIVE_PUSHED(r9)
+   stb r10, VCPU_XIVE_PUSHED(r9)
stb r10, (VCPU_XIVE_SAVED_STATE+3)(r9)
stb r0, (VCPU_XIVE_SAVED_STATE+4)(r9)
eieio
-- 
2.14.3



[PATCH v4 2/6] powerpc/kvm/xive: Enable use of the new "single escalation" feature

2018-01-11 Thread Benjamin Herrenschmidt
That feature, provided by Power9 DDD2.0 and later, when supported
by newer OPAL versions, allows to sacrifice a queue (priority 7)
in favor of merging all the escalation interrupts of the queues
of a single VP into a single interrupt.

This reduces the number of host interrupts used up by KVM guests
especially when those guests use multiple priorities.

It will also enable a future change to control the masking of the
escalation interrupts more precisely to avoid spurrious ones.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/opal-api.h |  1 +
 arch/powerpc/include/asm/xive.h |  3 ++-
 arch/powerpc/kvm/book3s_xive.c  | 48 -
 arch/powerpc/kvm/book3s_xive.h  | 15 +---
 arch/powerpc/sysdev/xive/native.c   | 18 --
 5 files changed, 57 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..fc926743647e 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -1073,6 +1073,7 @@ enum {
 /* Flags for OPAL_XIVE_GET/SET_VP_INFO */
 enum {
OPAL_XIVE_VP_ENABLED= 0x0001,
+   OPAL_XIVE_VP_SINGLE_ESCALATION  = 0x0002,
 };
 
 /* "Any chip" replacement for chip ID for allocation functions */
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index b619a5585cd6..e602903c3029 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -111,9 +111,10 @@ extern void xive_native_disable_queue(u32 vp_id, struct 
xive_q *q, u8 prio);
 
 extern void xive_native_sync_source(u32 hw_irq);
 extern bool is_xive_irq(struct irq_chip *chip);
-extern int xive_native_enable_vp(u32 vp_id);
+extern int xive_native_enable_vp(u32 vp_id, bool single_escalation);
 extern int xive_native_disable_vp(u32 vp_id);
 extern int xive_native_get_vp_info(u32 vp_id, u32 *out_cam_id, u32 
*out_chip_id);
+extern bool xive_native_has_single_escalation(void);
 
 #else
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index cd21c891be9e..87f814e81e7d 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -112,19 +112,21 @@ static int xive_attach_escalation(struct kvm_vcpu *vcpu, 
u8 prio)
return -EIO;
}
 
-   /*
-* Future improvement: start with them disabled
-* and handle DD2 and later scheme of merged escalation
-* interrupts
-*/
-   name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
-vcpu->kvm->arch.lpid, xc->server_num, prio);
+   if (xc->xive->single_escalation)
+   name = kasprintf(GFP_KERNEL, "kvm-%d-%d",
+vcpu->kvm->arch.lpid, xc->server_num);
+   else
+   name = kasprintf(GFP_KERNEL, "kvm-%d-%d-%d",
+vcpu->kvm->arch.lpid, xc->server_num, prio);
if (!name) {
pr_err("Failed to allocate escalation irq name for queue %d of 
VCPU %d\n",
   prio, xc->server_num);
rc = -ENOMEM;
goto error;
}
+
+   pr_devel("Escalation %s irq %d (prio %d)\n", name, xc->esc_virq[prio], 
prio);
+
rc = request_irq(xc->esc_virq[prio], xive_esc_irq,
 IRQF_NO_THREAD, name, vcpu);
if (rc) {
@@ -191,12 +193,12 @@ static int xive_check_provisioning(struct kvm *kvm, u8 
prio)
 
pr_devel("Provisioning prio... %d\n", prio);
 
-   /* Provision each VCPU and enable escalations */
+   /* Provision each VCPU and enable escalations if needed */
kvm_for_each_vcpu(i, vcpu, kvm) {
if (!vcpu->arch.xive_vcpu)
continue;
rc = xive_provision_queue(vcpu, prio);
-   if (rc == 0)
+   if (rc == 0 && !xive->single_escalation)
xive_attach_escalation(vcpu, prio);
if (rc)
return rc;
@@ -1082,6 +1084,7 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
/* Allocate IPI */
xc->vp_ipi = xive_native_alloc_irq();
if (!xc->vp_ipi) {
+   pr_err("Failed to allocate xive irq for VCPU IPI\n");
r = -EIO;
goto bail;
}
@@ -1091,19 +1094,34 @@ int kvmppc_xive_connect_vcpu(struct kvm_device *dev,
if (r)
goto bail;
 
+   /*
+* Enable the VP first as the single escalation mode will
+* affect escalation interrupts numbering
+*/
+   r = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
+   if (r) {
+   pr_err("Failed to enable VP in OPAL, err %d\n", r);
+   goto bail;
+   }
+
/*
 * Initialize queues. Initially we set them all for no queueing
 * and we enable escalation for queue 0 only which 

[PATCH v4 6/6] powerpc/kvm/xive: Keep escalation interrupt masked unless ceded

2018-01-11 Thread Benjamin Herrenschmidt
This works on top of the single escalation support. When in single
escalation, with this change, we will keep the escalation interrupt
disabled unless the VCPU is in H_CEDE (idle). In any other case, we
know the VCPU will be rescheduled and thus there is no need to take
escalation interrupts in the host whenever a guest interrupt fires.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/kvm_host.h |  3 ++
 arch/powerpc/kernel/asm-offsets.c   |  3 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 62 -
 arch/powerpc/kvm/book3s_xive.c  | 30 
 4 files changed, 97 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 0c44fa67608d..fef8133becc8 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -740,7 +740,10 @@ struct kvm_vcpu_arch {
struct kvmppc_xive_vcpu *xive_vcpu; /* XIVE virtual CPU data */
__be32 xive_cam_word;/* Cooked W2 in proper endian with valid bit */
u8 xive_pushed;  /* Is the VP pushed on the physical CPU ? */
+   u8 xive_esc_on;  /* Is the escalation irq enabled ? */
union xive_tma_w01 xive_saved_state; /* W0..1 of XIVE thread state */
+   u64 xive_esc_raddr;  /* Escalation interrupt ESB real addr */
+   u64 xive_esc_vaddr;  /* Escalation interrupt ESB virt addr */
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 825089cf3e23..1672dffd94e2 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -734,6 +734,9 @@ int main(void)
DEFINE(VCPU_XIVE_CAM_WORD, offsetof(struct kvm_vcpu,
arch.xive_cam_word));
DEFINE(VCPU_XIVE_PUSHED, offsetof(struct kvm_vcpu, arch.xive_pushed));
+   DEFINE(VCPU_XIVE_ESC_ON, offsetof(struct kvm_vcpu, arch.xive_esc_on));
+   DEFINE(VCPU_XIVE_ESC_RADDR, offsetof(struct kvm_vcpu, 
arch.xive_esc_raddr));
+   DEFINE(VCPU_XIVE_ESC_VADDR, offsetof(struct kvm_vcpu, 
arch.xive_esc_vaddr));
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 46dc250d072a..b76878a648ef 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1043,6 +1043,41 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
 */
li  r0,0
stb r0, VCPU_IRQ_PENDING(r4)
+
+   /*
+* In single escalation mode, if the escalation interrupt is
+* on, we mask it.
+*/
+   lbz r0, VCPU_XIVE_ESC_ON(r4)
+   cmpwi   r0,0
+   beq 1f
+   ld  r10, VCPU_XIVE_ESC_RADDR(r4)
+   li  r9, XIVE_ESB_SET_PQ_01
+   ldcix   r0, r10, r9
+   sync
+
+   /* We have a possible subtle race here: The escalation interrupt might
+* have fired and be on its way to the host queue while we mask it,
+* and if we unmask it early enough (re-cede right away), there is
+* a theorical possibility that it fires again, thus landing in the
+* target queue more than once which is a big no-no.
+*
+* Fortunately, solving this is rather easy. If the above load setting
+* PQ to 01 returns a previous value where P is set, then we know the
+* escalation interrupt is somewhere on its way to the host. In that
+* case we simply don't clear the xive_esc_on flag below. It will be
+* eventually cleared by the handler for the escalation interrupt.
+*
+* Then, when doing a cede, we check that flag again before re-enabling
+* the escalation interrupt, and if set, we abort the cede.
+*/
+   andi.   r0, r0, XIVE_ESB_VAL_P
+   bne-1f
+
+   /* Now P is 0, we can clear the flag */
+   li  r0, 0
+   stb r0, VCPU_XIVE_ESC_ON(r4)
+1:
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
@@ -2755,7 +2790,32 @@ kvm_cede_prodded:
/* we've ceded but we want to give control to the host */
 kvm_cede_exit:
ld  r9, HSTATE_KVM_VCPU(r13)
-   b   guest_exit_cont
+#ifdef CONFIG_KVM_XICS
+   /* Abort if we still have a pending escalation */
+   lbz r5, VCPU_XIVE_ESC_ON(r9)
+   cmpwi   r5, 0
+   beq 1f
+   li  r0, 0
+   stb r0, VCPU_CEDED(r9)
+1: /* Enable XIVE escalation */
+   li  r5, XIVE_ESB_SET_PQ_00
+   mfmsr   r0
+   andi.   r0, r0, MSR_DR  /* in real mode? */
+   beq 1f
+   ld  r10, VCPU_XIVE_ESC_VADDR(r9)
+   cmpdi   r10, 0
+   beq 3f
+   ldx r0, r10, r5
+   b   2f
+1: ld  r10, VCPU_XIVE_ESC_RADDR(r9)
+   cmpdi   r10, 0
+   beq 3f
+   ldcix   r0, r10, r5
+2: sync
+   li  r0, 1
+   

[PATCH v4 4/6] powerpc/kvm/xive: Check DR not IR to chose real vs virt mode MMIOs

2018-01-11 Thread Benjamin Herrenschmidt
Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 327f5e6a1e4d..506a1c775370 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1442,7 +1442,7 @@ guest_exit_cont:  /* r9 = vcpu, r12 = trap, r13 = 
paca */
li  r7, TM_SPC_PULL_OS_CTX
li  r6, TM_QW1_OS
mfmsr   r0
-   andi.   r0, r0, MSR_IR  /* in real mode? */
+   andi.   r0, r0, MSR_DR  /* in real mode? */
beq 2f
ld  r10, HSTATE_XIVE_TIMA_VIRT(r13)
cmpldi  cr0, r10, 0
-- 
2.14.3



[PATCH v4 3/6] powerpc/kvm/xive: Don't use existing "prodded" flag for xive escalations

2018-01-11 Thread Benjamin Herrenschmidt
The prodded flag is only cleared at the beginning of H_CEDE,
so every time we have an escalation, we will cause the *next*
H_CEDE to return immediately.

Instead use a dedicated "irq_pending" flag to indicate that
a guest interrupt is pending for the VCPU. We don't reuse the
existing exception bitmap as to avoid expensive atomic ops.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/kvm_host.h |  1 +
 arch/powerpc/kernel/asm-offsets.c   |  1 +
 arch/powerpc/kvm/book3s_hv.c|  2 +-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 10 ++
 arch/powerpc/kvm/book3s_xive.c  |  3 +--
 5 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b577cd60..bfe51356af5e 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -709,6 +709,7 @@ struct kvm_vcpu_arch {
u8 ceded;
u8 prodded;
u8 doorbell_request;
+   u8 irq_pending; /* Used by XIVE to signal pending guest irqs */
u32 last_inst;
 
struct swait_queue_head *wqp;
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 6b958414b4e0..825089cf3e23 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -514,6 +514,7 @@ int main(void)
OFFSET(VCPU_PENDING_EXC, kvm_vcpu, arch.pending_exceptions);
OFFSET(VCPU_CEDED, kvm_vcpu, arch.ceded);
OFFSET(VCPU_PRODDED, kvm_vcpu, arch.prodded);
+   OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 2d46037ce936..eafd722dce56 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -2987,7 +2987,7 @@ static inline bool xive_interrupt_pending(struct kvm_vcpu 
*vcpu)
 {
if (!xive_enabled())
return false;
-   return vcpu->arch.xive_saved_state.pipr <
+   return vcpu->arch.irq_pending || vcpu->arch.xive_saved_state.pipr <
vcpu->arch.xive_saved_state.cppr;
 }
 #else
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844784b8..327f5e6a1e4d 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -1033,6 +1033,16 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
li  r9, 1
stw r9, VCPU_XIVE_PUSHED(r4)
eieio
+
+   /*
+* We clear the irq_pending flag. There is a small chance of a
+* race vs. the escalation interrupt happening on another
+* processor setting it again, but the only consequence is to
+* cause a spurrious wakeup on the next H_CEDE which is not an
+* issue.
+*/
+   li  r0,0
+   stb r0, VCPU_IRQ_PENDING(r4)
 no_xive:
 #endif /* CONFIG_KVM_XICS */
 
diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 87f814e81e7d..de78ac38f7ed 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -84,8 +84,7 @@ static irqreturn_t xive_esc_irq(int irq, void *data)
 {
struct kvm_vcpu *vcpu = data;
 
-   /* We use the existing H_PROD mechanism to wake up the target */
-   vcpu->arch.prodded = 1;
+   vcpu->arch.irq_pending = 1;
smp_mb();
if (vcpu->arch.ceded)
kvmppc_fast_vcpu_kick(vcpu);
-- 
2.14.3



[PATCH v4 1/6] powerpc/kvm/xive: Add more debugfs queues info

2018-01-11 Thread Benjamin Herrenschmidt
Add details about enabled queues and escalation interrupts

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/kvm/book3s_xive.c | 28 
 1 file changed, 28 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c
index 0d750d274c4e..cd21c891be9e 100644
--- a/arch/powerpc/kvm/book3s_xive.c
+++ b/arch/powerpc/kvm/book3s_xive.c
@@ -1795,6 +1795,7 @@ static int xive_debug_show(struct seq_file *m, void 
*private)
 
kvm_for_each_vcpu(i, vcpu, kvm) {
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
+   unsigned int i;
 
if (!xc)
continue;
@@ -1804,6 +1805,33 @@ static int xive_debug_show(struct seq_file *m, void 
*private)
   xc->server_num, xc->cppr, xc->hw_cppr,
   xc->mfrr, xc->pending,
   xc->stat_rm_h_xirr, xc->stat_vm_h_xirr);
+   for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+   struct xive_q *q = >queues[i];
+   u32 i0, i1, idx;
+
+   if (!q->qpage && !xc->esc_virq[i])
+   continue;
+
+   seq_printf(m, " [q%d]: ", i);
+
+   if (q->qpage) {
+   idx = q->idx;
+   i0 = be32_to_cpup(q->qpage + idx);
+   idx = (idx + 1) & q->msk;
+   i1 = be32_to_cpup(q->qpage + idx);
+   seq_printf(m, "T=%d %08x %08x... \n", 
q->toggle, i0, i1);
+   }
+   if (xc->esc_virq[i]) {
+   struct irq_data *d = 
irq_get_irq_data(xc->esc_virq[i]);
+   struct xive_irq_data *xd = 
irq_data_get_irq_handler_data(d);
+   u64 pq = xive_vm_esb_load(xd, XIVE_ESB_GET);
+   seq_printf(m, "E:%c%c I(%d:%llx:%llx)",
+  (pq & XIVE_ESB_VAL_P) ? 'P' : 'p',
+  (pq & XIVE_ESB_VAL_Q) ? 'Q' : 'q',
+  xc->esc_virq[i], pq, xd->eoi_page);
+   seq_printf(m, "\n");
+   }
+   }
 
t_rm_h_xirr += xc->stat_rm_h_xirr;
t_rm_h_ipoll += xc->stat_rm_h_ipoll;
-- 
2.14.3



[PATCH 3/5] powerpc: Reduce log level of "OPAL detected !" message

2018-01-11 Thread Benjamin Herrenschmidt
This message isn't terribly useful.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/platforms/powernv/opal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/opal.c 
b/arch/powerpc/platforms/powernv/opal.c
index 041ddbd1fc57..2479e3396ed8 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -127,7 +127,7 @@ int __init early_init_dt_scan_opal(unsigned long node,
 
if (of_flat_dt_is_compatible(node, "ibm,opal-v3")) {
powerpc_firmware_features |= FW_FEATURE_OPAL;
-   pr_info("OPAL detected !\n");
+   pr_debug("OPAL detected !\n");
} else {
panic("OPAL != V3 detected, no longer supported.\n");
}
-- 
2.14.3



[PATCH 4/5] powerpc: Remove useless EXC_COMMON_HV

2018-01-11 Thread Benjamin Herrenschmidt
The only difference between EXC_COMMON_HV and EXC_COMMON is that the
former adds "2" to the trap number which is supposed to represent the
fact that this is an "HV" interrupt which uses HSRR0/1.

However KVM is the only one who cares and it has its own separate macros.

In fact, we only have one user of EXC_COMMON_HV and it's for an
unknown interrupt case. All the other ones already using EXC_COMMON.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/include/asm/head-64.h   | 7 +--
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index fdcff76e9a25..0a663dfc28b5 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -178,7 +178,7 @@ end_##sname:
  * TRAMP_REAL_*   - real, unrelocated helpers (virt can call these)
  * TRAMP_VIRT_*   - virt, unreloc helpers (in practice, real can use)
  * TRAMP_KVM  - KVM handlers that get put into real, unrelocated
- * EXC_COMMON_*   - virt, relocated common handlers
+ * EXC_COMMON - virt, relocated common handlers
  *
  * The EXC handlers are given a name, and branch to name_common, or the
  * appropriate KVM or masking function. Vector handler verieties are as
@@ -211,7 +211,6 @@ end_##sname:
  * EXC_COMMON_BEGIN/END - used to open-code the handler
  * EXC_COMMON
  * EXC_COMMON_ASYNC
- * EXC_COMMON_HV
  *
  * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
  * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
@@ -413,10 +412,6 @@ end_##sname:
EXC_COMMON_BEGIN(name); \
STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr);\
 
-#define EXC_COMMON_HV(name, realvec, hdlr) \
-   EXC_COMMON_BEGIN(name); \
-   STD_EXCEPTION_COMMON(realvec + 0x2, name, hdlr);\
-
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_HEAD_64_H */
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index e441b469dc8f..175891c6909c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1318,7 +1318,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
b   .
 #endif
 
-EXC_COMMON_HV(denorm_common, 0x1500, unknown_exception)
+EXC_COMMON(denorm_common, 0x1500, unknown_exception)
 
 
 #ifdef CONFIG_CBE_RAS
-- 
2.14.3



[PATCH 2/5] powerpc: Remove DEBUG define in 64-bit early setup code

2018-01-11 Thread Benjamin Herrenschmidt
This statement causes some not very useful messages to always
be printed on the serial port at boot, even on quiet boots.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/kernel/setup_64.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8956a9856604..d3124c302146 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -10,8 +10,6 @@
  *  2 of the License, or (at your option) any later version.
  */
 
-#define DEBUG
-
 #include 
 #include 
 #include 
-- 
2.14.3



[PATCH 1/5] powerpc/xive: Remove incorrect debug code

2018-01-11 Thread Benjamin Herrenschmidt
WORD2 if the TIMA isn't byte accessible and
isn't that useful to know about, take out the
pr_devel statement.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/sysdev/xive/common.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/arch/powerpc/sysdev/xive/common.c 
b/arch/powerpc/sysdev/xive/common.c
index a3b8d7d1316e..838ebdbfe4c5 100644
--- a/arch/powerpc/sysdev/xive/common.c
+++ b/arch/powerpc/sysdev/xive/common.c
@@ -1269,11 +1269,6 @@ static void xive_setup_cpu(void)
 {
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
 
-   /* Debug: Dump the TM state */
-   pr_devel("CPU %d [HW 0x%02x] VT=%02x\n",
-   smp_processor_id(), hard_smp_processor_id(),
-   in_8(xive_tima + xive_tima_offset + TM_WORD2));
-
/* The backend might have additional things to do */
if (xive_ops->setup_cpu)
xive_ops->setup_cpu(smp_processor_id(), xc);
-- 
2.14.3



[PATCH 5/5] powerpc: Use the TRAP macro whenever comparing a trap number

2018-01-11 Thread Benjamin Herrenschmidt
Trap numbers can have extra bits at the bottom that need to
be filtered out. There are a few cases where we don't do that.

It's possible that we got lucky but better safe than sorry.

Signed-off-by: Benjamin Herrenschmidt 
---
 arch/powerpc/kernel/process.c | 2 +-
 arch/powerpc/kernel/traps.c   | 2 +-
 arch/powerpc/mm/fault.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 72be0c32e902..397d6e64e3df 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1409,7 +1409,7 @@ void show_regs(struct pt_regs * regs)
print_msr_bits(regs->msr);
pr_cont("  CR: %08lx  XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
-   if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
+   if ((TRAP(regs) != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
 #if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f3eb61be0d30..d61989be28e1 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -1564,7 +1564,7 @@ void facility_unavailable_exception(struct pt_regs *regs)
u8 status;
bool hv;
 
-   hv = (regs->trap == 0xf80);
+   hv = (TRAP(regs) == 0xf80);
if (hv)
value = mfspr(SPRN_HFSCR);
else
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 6e1e39035380..7b15fe2ac986 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -576,7 +576,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long 
address, int sig)
 
/* kernel has accessed a bad area */
 
-   switch (regs->trap) {
+   switch (TRAP(regs)) {
case 0x300:
case 0x380:
printk(KERN_ALERT "Unable to handle kernel paging request for "
-- 
2.14.3



[PATCH 06/11] signal/powerpc: Document conflicts with SI_USER and SIGFPE and SIGTRAP

2018-01-11 Thread Eric W. Biederman
Setting si_code to 0 results in a userspace seeing an si_code of 0.
This is the same si_code as SI_USER.  Posix and common sense requires
that SI_USER not be a signal specific si_code.  As such this use of 0
for the si_code is a pretty horribly broken ABI.

Further use of si_code == 0 guaranteed that copy_siginfo_to_user saw a
value of __SI_KILL and now sees a value of SIL_KILL with the result
that uid and pid fields are copied and which might copying the si_addr
field by accident but certainly not by design.  Making this a very
flakey implementation.

Utilizing FPE_FIXME and TRAP_FIXME, siginfo_layout() will now return
SIL_FAULT and the appropriate fields will be reliably copied.

Possible ABI fixes includee:
- Send the signal without siginfo
- Don't generate a signal
- Possibly assign and use an appropriate si_code
- Don't handle cases which can't happen
Cc: Paul Mackerras 
Cc: Kumar Gala 
Cc: Michael Ellerman 
Cc: Benjamin Herrenschmidt 
Cc:  linuxppc-dev@lists.ozlabs.org
Ref: 9bad068c24d7 ("[PATCH] ppc32: support for e500 and 85xx")
Ref: 0ed70f6105ef ("PPC32: Provide proper siginfo information on various 
exceptions.")
History Tree: https://git.kernel.org/pub/scm/linux/kernel/git/tglx/history.git
Signed-off-by: "Eric W. Biederman" 
---
 arch/powerpc/include/uapi/asm/siginfo.h | 15 +++
 arch/powerpc/kernel/traps.c | 10 +-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/siginfo.h 
b/arch/powerpc/include/uapi/asm/siginfo.h
index 1a691141e49f..444ca6c9989a 100644
--- a/arch/powerpc/include/uapi/asm/siginfo.h
+++ b/arch/powerpc/include/uapi/asm/siginfo.h
@@ -18,4 +18,19 @@
 #undef NSIGTRAP
 #define NSIGTRAP   4
 
+/*
+ * SIGFPE si_codes
+ */
+#ifdef __KERNEL__
+#define FPE_FIXME  0   /* Broken dup of SI_USER */
+#endif /* __KERNEL__ */
+
+/*
+ * SIGTRAP si_codes
+ */
+#ifdef __KERNEL__
+#define TRAP_FIXME 0   /* Broken dup of SI_USER */
+#endif /* __KERNEL__ */
+
+
 #endif /* _ASM_POWERPC_SIGINFO_H */
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index f3eb61be0d30..f2e6e1838952 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -917,7 +917,7 @@ void unknown_exception(struct pt_regs *regs)
printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n",
   regs->nip, regs->msr, regs->trap);
 
-   _exception(SIGTRAP, regs, 0, 0);
+   _exception(SIGTRAP, regs, TRAP_FIXME, 0);
 
exception_exit(prev_state);
 }
@@ -939,7 +939,7 @@ void instruction_breakpoint_exception(struct pt_regs *regs)
 
 void RunModeException(struct pt_regs *regs)
 {
-   _exception(SIGTRAP, regs, 0, 0);
+   _exception(SIGTRAP, regs, TRAP_FIXME, 0);
 }
 
 void single_step_exception(struct pt_regs *regs)
@@ -978,7 +978,7 @@ static void emulate_single_step(struct pt_regs *regs)
 
 static inline int __parse_fpscr(unsigned long fpscr)
 {
-   int ret = 0;
+   int ret = FPE_FIXME;
 
/* Invalid operation */
if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX))
@@ -1929,7 +1929,7 @@ void SPEFloatingPointException(struct pt_regs *regs)
extern int do_spe_mathemu(struct pt_regs *regs);
unsigned long spefscr;
int fpexc_mode;
-   int code = 0;
+   int code = FPE_FIXME;
int err;
 
flush_spe_to_thread(current);
@@ -1998,7 +1998,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs)
printk(KERN_ERR "unrecognized spe instruction "
   "in %s at %lx\n", current->comm, regs->nip);
} else {
-   _exception(SIGFPE, regs, 0, regs->nip);
+   _exception(SIGFPE, regs, FPE_FIXME, regs->nip);
return;
}
 }
-- 
2.14.1



Re: [linux-next][qla2xxx][85caa95]kernel BUG at lib/list_debug.c:31!

2018-01-11 Thread Madhani, Himanshu

> On Jan 10, 2018, at 9:38 PM, Abdul Haleem  wrote:
> 
> On Tue, 2018-01-09 at 18:09 +, Madhani, Himanshu wrote:
>> Hello Abdul, 
>> 
>>> On Jan 9, 2018, at 7:54 AM, Bart Van Assche  wrote:
>>> 
>>> On Tue, 2018-01-09 at 14:44 +0530, Abdul Haleem wrote:
 Greeting's, 
 
 Linux next kernel panics on powerpc when module qla2xxx is load/unload.
 
 Machine Type: Power 8 PowerVM LPAR
 Kernel : 4.15.0-rc2-next-20171211
 gcc : version 4.8.5
 Test type: module load/unload few times
 
 Trace messages:
 ---
 qla2xxx [:00:00.0]-0005: : QLogic Fibre Channel HBA Driver: 
 10.00.00.03-k.
 qla2xxx [0106:a0:00.0]-001a: : MSI-X vector count: 32.
 qla2xxx [0106:a0:00.0]-001d: : Found an ISP2532 irq 505 iobase 
 0xaeb324e6.
 qla2xxx [0106:a0:00.0]-00c6:1: MSI-X: Failed to enable support with 32 
 vectors, using 16 vectors.
 qla2xxx [0106:a0:00.0]-00fb:1: QLogic QLE2562 - PCIe 2-port 8Gb FC Adapter.
 qla2xxx [0106:a0:00.0]-00fc:1: ISP2532: PCIe (5.0GT/s x8) @ 0106:a0:00.0 
 hdma- host#=1 fw=8.06.00 (90d5).
 qla2xxx [0106:a0:00.1]-001a: : MSI-X vector count: 32.
 qla2xxx [0106:a0:00.1]-001d: : Found an ISP2532 irq 506 iobase 
 0xa46f1774.
 qla2xxx [0106:a0:00.1]-00c6:2: MSI-X: Failed to enable support with 32 
 vectors, using 16 vectors.
 2xxx
 qla2xxx [0106:a0:00.1]-00fb:2: QLogic QLE2562 - PCIe 2-port 8Gb FC Adapter.
 qla2xxx [0106:a0:00.1]-00fc:2: ISP2532: PCIe (5.0GT/s x8) @ 0106:a0:00.1 
 hdma- host#=2 fw=8.06.00 (90d5).
 0:00.0]-500a:1: LOOP UP detected (8 Gbps). 
 qla2xxx [0106:a0:00.1]-500a:2: LOOP UP detected (8 Gbps).
 list_add double add: new=8d33e594, prev=8d33e594, 
 next=adef1df4.
 [ cut here ]
 kernel BUG at lib/list_debug.c:31! 
 Oops: Exception in kernel mode, sig: 5 [#1]
 LE SMP NR_CPUS=2048 NUMA pSeries 
 Dumping ftrace buffer: 
  (ftrace buffer empty)
 Modules linked in: qla2xxx(E) tg3(E) ibmveth(E) xt_CHECKSUM(E)
 iptable_mangle(E) ipt_MASQUERADE(E) nf_nat_masquerade_ipv4(E)
 iptable_nat(E) nf_nat_ipv4(E) nf_nat(E) nf_conntrack_ipv4(E)
 nf_defrag_ipv4(E) xt_conntrack(E) nf_conntrack(E) ipt_REJECT(E)
 nf_reject_ipv4(E) tun(E) bridge(E) stp(E) llc(E) kvm_pr(E) kvm(E)
 sctp_diag(E) sctp(E) libcrc32c(E) tcp_diag(E) udp_diag(E)
 ebtable_filter(E) ebtables(E) dccp_diag(E) ip6table_filter(E) dccp(E)
 ip6_tables(E) iptable_filter(E) inet_diag(E) unix_diag(E)
 af_packet_diag(E) netlink_diag(E) xts(E) sg(E) vmx_crypto(E)
 pseries_rng(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E)
 sunrpc(E) binfmt_misc(E) ip_tables(E) ext4(E) mbcache(E) jbd2(E)
 fscrypto(E) sd_mod(E) ibmvscsi(E) scsi_transport_srp(E) nvme_fc(E)
 nvme_fabrics(E) nvme_core(E) scsi_transport_fc(E)
 ptp(E) pps_core(E) dm_mirror(E) dm_region_hash(E) dm_log(E) dm_mod(E)
 [last unloaded: qla2xxx]
 CPU: 7 PID: 22230 Comm: qla2xxx_1_dpc Tainted: GE
 4.15.0-rc2-next-20171211-autotest-autotest #1
 NIP:  c0511040 LR: c051103c CTR: 00655170
 REGS: 9b7356fa TRAP: 0700   Tainted: GE 
 (4.15.0-rc2-next-20171211-autotest-autotest)
 MSR:  80010282b033   CR: 
 2222  XER: 0009  
 CFAR: c0170594 SOFTE: 0 
 GPR00: c051103c c000fc293ac0 c10f1d00 0058 
 GPR04: c0028fcccdd0 c0028fce3798 8000374060b8  
 GPR08:  c0d435ec 00028ef9 2717 
 GPR12:  ce734980 c01215d8 c002886996c0 
 GPR16:  0020 c002813d83f8 0001 
 GPR20: 2000 2000 0002 c002813dc808 
 GPR24: 0003 0001 c0027f5a5c20 c002813dced0 
 GPR28: c0027f5a5d90 c0027f5a5d90 c0027f5a5c00 c002813dc7f8 
 NIP [c0511040] __list_add_valid+0x70/0xb0
 LR [c051103c] __list_add_valid+0x6c/0xb0
 Call Trace:
 [c000fc293ac0] [c051103c] __list_add_valid+0x6c/0xb0 
 (unreliable)
 [c000fc293b20] [d51f1a08] qla24xx_async_gnl+0x108/0x420 
 [qla2xxx]
 [c000fc293bc0] [d51e762c] qla2x00_do_work+0x18c/0x8c0 [qla2xxx]
 [c000fc293ce0] [d51e8180] qla2x00_relogin+0x420/0xff0 [qla2xxx]
 [c000fc293dc0] [c012172c] kthread+0x15c/0x1a0
 [c000fc293e30] [c000b4e8] ret_from_kernel_thread+0x5c/0x74
 Instruction dump:
 41de0018 38210060 3861 e8010010 7c0803a6 4e800020 3c62ffae 7d445378 
 38631748 7d254b78 4bc5f51d 6000 <0fe0> 3c62ffae 7cc43378 386316f8 
 ---[ end trace a41bc8bd434657f1 

Re: [PATCH 00/26] KVM: PPC: Book3S PR: Transaction memory support on PR KVM

2018-01-11 Thread Benjamin Herrenschmidt
On Thu, 2018-01-11 at 11:56 -0200, Gustavo Romero wrote:
> Hi Simon,
> 
> On 01/11/2018 08:11 AM, wei.guo.si...@gmail.com wrote:
> > From: Simon Guo 
> > 
> > In current days, many OS distributions have utilized transaction
> > memory functionality. In PowerPC, HV KVM supports TM. But PR KVM
> > does not.
> > 
> > The drive for the transaction memory support of PR KVM is the
> > openstack Continuous Integration testing - They runs a HV(hypervisor)
> > KVM(as level 1) and then run PR KVM(as level 2) on top of that.
> > 
> > This patch set add transaction memory support on PR KVM.
> 
> Is this correct to assume that this emulation mode will just kick in on P9
> with kernel TM workarounds and HV KVM will continue to be used on POWER8
> since HV KVM is supported on POWER8 hosts?

HV KVM is supported on POWER9. In fact it's PR KVM that isn't (at least
not yet and never will be in Radix mode at least).

Cheers,
Ben.

> 
> 
> Regards,
> Gustavo
> 
> > Test cases performed:
> > linux/tools/testing/selftests/powerpc/tm/tm-syscall
> > linux/tools/testing/selftests/powerpc/tm/tm-fork
> > linux/tools/testing/selftests/powerpc/tm/tm-vmx-unavail
> > linux/tools/testing/selftests/powerpc/tm/tm-tmspr
> > linux/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv
> > linux/tools/testing/selftests/powerpc/math/vsx_preempt
> > linux/tools/testing/selftests/powerpc/math/fpu_signal
> > linux/tools/testing/selftests/powerpc/math/vmx_preempt
> > linux/tools/testing/selftests/powerpc/math/fpu_syscall
> > linux/tools/testing/selftests/powerpc/math/vmx_syscall
> > linux/tools/testing/selftests/powerpc/math/fpu_preempt
> > linux/tools/testing/selftests/powerpc/math/vmx_signal
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr
> > linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx
> > https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c
> > https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c
> > https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c
> > 
> > Simon Guo (25):
> >   KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate
> > file
> >   KVM: PPC: Book3S PR: add new parameter (guest MSR) for
> > kvmppc_save_tm()/kvmppc_restore_tm()
> >   KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()
> >   KVM: PPC: Book3S PR: add C function wrapper for
> > _kvmppc_save/restore_tm()
> >   KVM: PPC: Book3S PR: In PR KVM suspends Transactional state when
> > inject an interrupt.
> >   KVM: PPC: Book3S PR: PR KVM pass through MSR TM/TS bits to shadow_msr.
> >   KVM: PPC: Book3S PR: add TEXASR related macros
> >   KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state
> > guest
> >   KVM: PPC: Book3S PR: implement RFID TM behavior to suppress change
> > from S0 to N0
> >   KVM: PPC: Book3S PR: set MSR HV bit accordingly for PPC970 and others.
> >   KVM: PPC: Book3S PR: prevent TS bits change in kvmppc_interrupt_pr()
> >   powerpc: export symbol msr_check_and_set().
> >   KVM: PPC: Book3S PR: adds new
> > kvmppc_copyto_vcpu_tm/kvmppc_copyfrom_vcpu_tm API for PR KVM.
> >   KVM: PPC: Book3S PR: export tm_enable()/tm_disable/tm_abort() APIs
> >   KVM: PPC: Book3S PR: add kvmppc_save/restore_tm_sprs() APIs
> >   KVM: PPC: Book3S PR: add transaction memory save/restore skeleton for
> > PR KVM
> >   KVM: PPC: Book3S PR: add math support for PR KVM HTM
> >   KVM: PPC: Book3S PR: make mtspr/mfspr emulation behavior based on
> > active TM SPRs
> >   KVM: PPC: Book3S PR: always fail transaction in guest privilege state
> >   KVM: PPC: Book3S PR: enable NV reg restore for reading TM SPR at guest
> > privilege state
> >   KVM: PPC: Book3S PR: adds emulation for treclaim.
> >   KVM: PPC: Book3S PR: add emulation for trechkpt in PR KVM.
> >   KVM: PPC: Book3S PR: add emulation for tabort. for privilege guest
> >   KVM: PPC: Book3S PR: add guard code to prevent returning to guest with
> > PR=0 and Transactional state
> >   KVM: PPC: Book3S PR: enable HTM for PR KVM for KVM_CHECK_EXTENSION
> > ioctl
> > 
> >  arch/powerpc/include/asm/asm-prototypes.h   |  10 +
> >  arch/powerpc/include/asm/kvm_book3s.h   |   8 +
> >  arch/powerpc/include/asm/kvm_host.h |   3 +
> >  arch/powerpc/include/asm/reg.h  |  25 +-
> >  arch/powerpc/include/asm/tm.h   |   2 -
> >  arch/powerpc/include/uapi/asm/tm.h  |   2 +-
> >  arch/powerpc/kernel/process.c   |   1 +
> >  arch/powerpc/kernel/tm.S|  12 +
> >  arch/powerpc/kvm/Makefile   |   3 +
> >  arch/powerpc/kvm/book3s.h   |   1 +
> >  arch/powerpc/kvm/book3s_64_mmu.c|  11 +-
> >  arch/powerpc/kvm/book3s_emulate.c   | 

Re: [PATCH v6 2/2] cxl: read PHB indications from the device tree

2018-01-11 Thread Frederic Barrat



Le 11/01/2018 à 16:01, Philippe Bergheaud a écrit :

Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud 



Acked-by: Frederic Barrat 

Thanks



---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
 Drop cosmetic fix in comment.

v5: get_phb_indications():
   - make static variables local to function.
   - return static variable values by arguments.

v6: get_phb_indications():
   - acquire a mutex before setting the phb indications.

This patch depends on the following skiboot patch:
   https://patchwork.ozlabs.org/patch/858324/
---
  drivers/misc/cxl/cxl.h|  2 +-
  drivers/misc/cxl/cxllib.c |  2 +-
  drivers/misc/cxl/pci.c| 50 ++-
  3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
  int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
  int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
  u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);

  void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;

-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..150883d761f1 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,61 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
  }

-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!capi) {
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -1;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   mutex_unlock(_mutex);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
  {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;

/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -1;

/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));

/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +477,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));

/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not 

Re: [PATCH v6 1/2] powerpc/powernv: Enable tunneled operations

2018-01-11 Thread Frederic Barrat



Le 11/01/2018 à 16:01, Philippe Bergheaud a écrit :

P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
Tell kernel the Tunnel BAR Response address used by driver.
This function uses two new OPAL calls, as the PBCQ Tunnel BAR
register is configured by skiboot.

pnv_pci_get_as_notify_info()
Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud 
---



Reviewed-by: Frederic Barrat 




Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
 Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
   - fix node reference count
 pnv_pci_get_as_notify_info():
   - fail if task == NULL
   - read pid from mm->context.id
   - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
   - check if radix is enabled, or else return an error
  pnv_pci_get_as_notify_info():
   - remove a capi-specific comment, irrelevant for pci

This patch depends on the following skiboot patches:
   https://patchwork.ozlabs.org/patch/858324/
   https://patchwork.ozlabs.org/patch/858325/
---
  arch/powerpc/include/asm/opal-api.h|   4 +-
  arch/powerpc/include/asm/opal.h|   2 +
  arch/powerpc/include/asm/pnv-pci.h |   5 ++
  arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
  arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
  arch/powerpc/platforms/powernv/pci.c   | 106 +
  6 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
  #define OPAL_SET_POWER_SHIFT_RATIO155
  #define OPAL_SENSOR_GROUP_CLEAR   156
  #define OPAL_PCI_SET_P2P  157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160

  /* Device tree flags */

diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
  int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
  int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
  int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
  int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
  int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
  extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);

+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
  int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
  int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
  OPAL_CALL(opal_get_power_shift_ratio, OPAL_GET_POWER_SHIFT_RATIO);
  OPAL_CALL(opal_set_power_shift_ratio, OPAL_SET_POWER_SHIFT_RATIO);
  OPAL_CALL(opal_sensor_group_clear,OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);

[PATCH 4/5] powerpc/4xx: Improve a size determination in two functions

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:32:33 +0100

Replace the specification of data structures by pointer dereferences
as the parameter for the operator "sizeof" to make the corresponding size
determination a bit safer according to the Linux coding style convention.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
 arch/powerpc/platforms/4xx/msi.c | 2 +-
 arch/powerpc/platforms/4xx/ocm.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index d16b81303cb0..f693cab07752 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -223,7 +223,7 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
 
dev_dbg(>dev, "PCIE-MSI: Setting up MSI support...\n");
 
-   msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
+   msi = kzalloc(sizeof(*msi), GFP_KERNEL);
if (!msi)
return -ENOMEM;
 
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 5a750d0ad446..11418f090bf4 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -335,7 +335,7 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int 
align,
if (IS_ERR_VALUE(offset))
continue;
 
-   ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
+   ocm_blk = kzalloc(sizeof(*ocm_blk), GFP_KERNEL);
if (!ocm_blk) {
rh_free(ocm_reg->rh, offset);
break;
-- 
2.15.1



[PATCH 5/5] powerpc/4xx: Delete an unnecessary return statement in two functions

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:40:23 +0100

The script "checkpatch.pl" pointed information out like the following.

WARNING: void function return statements are not generally useful

Thus remove such a statement in the affected functions.

Signed-off-by: Markus Elfring 
---
 arch/powerpc/platforms/4xx/ocm.c | 2 --
 arch/powerpc/platforms/4xx/pci.c | 1 -
 2 files changed, 3 deletions(-)

diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 11418f090bf4..aee5f041ff14 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -226,8 +226,6 @@ static void __init ocm_init_node(int count, struct 
device_node *node)
INIT_LIST_HEAD(>c.list);
 
ocm->ready = 1;
-
-   return;
 }
 
 static int ocm_debugfs_show(struct seq_file *m, void *v)
diff --git a/arch/powerpc/platforms/4xx/pci.c b/arch/powerpc/platforms/4xx/pci.c
index 73e6b36bcd51..b73ea8f4be1f 100644
--- a/arch/powerpc/platforms/4xx/pci.c
+++ b/arch/powerpc/platforms/4xx/pci.c
@@ -1399,7 +1399,6 @@ static void __init ppc_476fpe_pciex_check_link(struct 
ppc4xx_pciex_port *port)
printk(KERN_WARNING "PCIE%d: Link up failed\n", port->index);
 
iounmap(mbase);
-   return;
 }
 
 static struct ppc4xx_pciex_hwops ppc_476fpe_pcie_hwops __initdata =
-- 
2.15.1



[PATCH 3/5] powerpc/4xx: Delete an error message for a failed memory allocation in three functions

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:28:54 +0100

Omit an extra message for a memory allocation failure in these functions.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
 arch/powerpc/platforms/4xx/msi.c | 5 ++---
 arch/powerpc/platforms/4xx/ocm.c | 5 +
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/4xx/msi.c b/arch/powerpc/platforms/4xx/msi.c
index d50417e23add..d16b81303cb0 100644
--- a/arch/powerpc/platforms/4xx/msi.c
+++ b/arch/powerpc/platforms/4xx/msi.c
@@ -224,10 +224,9 @@ static int ppc4xx_msi_probe(struct platform_device *dev)
dev_dbg(>dev, "PCIE-MSI: Setting up MSI support...\n");
 
msi = kzalloc(sizeof(struct ppc4xx_msi), GFP_KERNEL);
-   if (!msi) {
-   dev_err(>dev, "No memory for MSI structure\n");
+   if (!msi)
return -ENOMEM;
-   }
+
dev->dev.platform_data = msi;
 
/* Get MSI ranges */
diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 8cb601e8bed7..5a750d0ad446 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -337,7 +337,6 @@ void *ppc4xx_ocm_alloc(phys_addr_t *phys, int size, int 
align,
 
ocm_blk = kzalloc(sizeof(struct ocm_block), GFP_KERNEL);
if (!ocm_blk) {
-   printk(KERN_ERR "PPC4XX OCM: could not allocate ocm 
block");
rh_free(ocm_reg->rh, offset);
break;
}
@@ -391,10 +390,8 @@ static int __init ppc4xx_ocm_init(void)
return 0;
 
ocm_nodes = kzalloc((count * sizeof(struct ocm_info)), GFP_KERNEL);
-   if (!ocm_nodes) {
-   printk(KERN_ERR "PPC4XX OCM: failed to allocate OCM nodes!\n");
+   if (!ocm_nodes)
return -ENOMEM;
-   }
 
ocm_count = count;
count = 0;
-- 
2.15.1



[PATCH 2/5] powerpc/4xx: Use seq_putc() in ocm_debugfs_show()

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:10:02 +0100

A single character (line break) should be put into a sequence.
Thus use the corresponding function "seq_putc".

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
 arch/powerpc/platforms/4xx/ocm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 5bc1baf809d1..8cb601e8bed7 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -267,7 +267,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
blk->size, blk->owner);
}
 
-   seq_printf(m, "\n");
+   seq_putc(m, '\n');
}
 
return 0;
-- 
2.15.1



[PATCH 1/5] powerpc/4xx: Combine four seq_printf() calls into two in ocm_debugfs_show()

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:08:08 +0100

Some data were printed into a sequence by four separate function calls.
Print the same data by two single function calls instead.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring 
---
 arch/powerpc/platforms/4xx/ocm.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/platforms/4xx/ocm.c b/arch/powerpc/platforms/4xx/ocm.c
index 85d9e37f5ccb..5bc1baf809d1 100644
--- a/arch/powerpc/platforms/4xx/ocm.c
+++ b/arch/powerpc/platforms/4xx/ocm.c
@@ -245,9 +245,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
seq_printf(m, "PhysAddr : 0x%llx\n", ocm->phys);
seq_printf(m, "MemTotal : %d Bytes\n", ocm->memtotal);
seq_printf(m, "MemTotal(NC) : %d Bytes\n", ocm->nc.memtotal);
-   seq_printf(m, "MemTotal(C)  : %d Bytes\n", ocm->c.memtotal);
-
-   seq_printf(m, "\n");
+   seq_printf(m, "MemTotal(C)  : %d Bytes\n\n", ocm->c.memtotal);
 
seq_printf(m, "NC.PhysAddr  : 0x%llx\n", ocm->nc.phys);
seq_printf(m, "NC.VirtAddr  : 0x%p\n", ocm->nc.virt);
@@ -259,9 +257,7 @@ static int ocm_debugfs_show(struct seq_file *m, void *v)
blk->size, blk->owner);
}
 
-   seq_printf(m, "\n");
-
-   seq_printf(m, "C.PhysAddr   : 0x%llx\n", ocm->c.phys);
+   seq_printf(m, "\nC.PhysAddr   : 0x%llx\n", ocm->c.phys);
seq_printf(m, "C.VirtAddr   : 0x%p\n", ocm->c.virt);
seq_printf(m, "C.MemTotal   : %d Bytes\n", ocm->c.memtotal);
seq_printf(m, "C.MemFree: %d Bytes\n", ocm->c.memfree);
-- 
2.15.1



[PATCH 0/5] powerpc/platforms/4xx: Adjustments for six function implementations

2018-01-11 Thread SF Markus Elfring
From: Markus Elfring 
Date: Thu, 11 Jan 2018 18:48:28 +0100

A few update suggestions were taken into account
from static source code analysis.

Markus Elfring (5):
  Combine four seq_printf() calls into two in ocm_debugfs_show()
  Use seq_putc() in ocm_debugfs_show()
  Delete an error message for a failed memory allocation in three functions
  Improve a size determination in two functions
  Delete an unnecessary return statement in two functions

 arch/powerpc/platforms/4xx/msi.c |  7 +++
 arch/powerpc/platforms/4xx/ocm.c | 19 +--
 arch/powerpc/platforms/4xx/pci.c |  1 -
 3 files changed, 8 insertions(+), 19 deletions(-)

-- 
2.15.1



Re: [PATCH V7] cxl: Add support for ASB_Notify on POWER9

2018-01-11 Thread Frederic Barrat



Le 11/01/2018 à 09:55, Christophe Lombard a écrit :

The POWER9 core supports a new feature: ASB_Notify which requires the
support of the Special Purpose Register: TIDR.

The ASB_Notify command, generated by the AFU, will attempt to
wake-up the host thread identified by the particular LPID:PID:TID.

This patch assign a unique TIDR (thread id) for the current thread which
will be used in the process element entry.

Signed-off-by: Christophe Lombard 
Reviewed-by: Philippe Bergheaud 


Looks ok to me, thanks!

Acked-by: Frederic Barrat 



Changelog[v7]
  - Rebased to latest upstream.
  - Added boolean: "need to allocate a TIDR"
  - Released the mutex and mark the context as STARTED in case of error.

Changelog[v6]
  - Rebased to latest upstream.
  - Updated the ioctl interface.
  - Removed the updated ptrace.
  - Assigned a unique TIDR for the current thread at a lower level.

Changelog[v5]
  - Rebased to latest upstream.
  - Updated the ioctl interface.
  - Returned the tid in the ioctl structure.

Changelog[v4]
  - Rebased to latest upstream.
  - Updated the ioctl interface.
  - Removed the field tid in the context structure.

Changelog[v3]
  - Rebased to latest upstream.
  - Updated attr->tid field in cxllib_get_PE_attributes().

Changelog[v2]
  - Rebased to latest upstream.
  - Updated the ioctl interface.
  - Added a checking to allow updating the TIDR if a P9 chip is present.
---
  arch/powerpc/kernel/process.c |  1 +
  drivers/misc/cxl/context.c|  2 ++
  drivers/misc/cxl/cxl.h|  3 +++
  drivers/misc/cxl/cxllib.c |  3 ++-
  drivers/misc/cxl/file.c   | 15 +--
  drivers/misc/cxl/native.c | 13 -
  include/uapi/misc/cxl.h   | 10 ++
  7 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 72be0c3..1dc39dd 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1589,6 +1589,7 @@ int set_thread_tidr(struct task_struct *t)

return 0;
  }
+EXPORT_SYMBOL_GPL(set_thread_tidr);

  #endif /* CONFIG_PPC64 */

diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 12a41b2..7ff315a 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu 
*afu, bool master)
ctx->pid = NULL; /* Set in start work ioctl */
mutex_init(>mapping_lock);
ctx->mapping = NULL;
+   ctx->tidr = 0;
+   ctx->assign_tidr = false;

if (cxl_is_power8()) {
spin_lock_init(>sste_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a406..53149fb 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -630,6 +630,9 @@ struct cxl_context {
struct list_head extra_irq_contexts;

struct mm_struct *mm;
+
+   u16 tidr;
+   bool assign_tidr;
  };

  struct cxl_irq_info;
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc18..30ccba4 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
 */
attr->pid = mm->context.id;
mmput(mm);
+   attr->tid = task->thread.tidr;
} else {
attr->pid = 0;
+   attr->tid = 0;
}
-   attr->tid = 0;
return 0;
  }
  EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 76c0b0c..93fd381 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 * flags are set it's invalid
 */
if (work.reserved1 || work.reserved2 || work.reserved3 ||
-   work.reserved4 || work.reserved5 || work.reserved6 ||
+   work.reserved4 || work.reserved5 ||
(work.flags & ~CXL_START_WORK_ALL)) {
rc = -EINVAL;
goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
rc =  -EINVAL;
goto out;
}
+
if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
goto out;

if (work.flags & CXL_START_WORK_AMR)
amr = work.amr & mfspr(SPRN_UAMOR);

+   if (work.flags & CXL_START_WORK_TID)
+   ctx->assign_tidr = true;
+
ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);

/*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
goto out;
}

-   ctx->status = STARTED;
rc = 0;
+   if (work.flags & CXL_START_WORK_TID) {
+   work.tid = ctx->tidr;
+   if (copy_to_user(uwork, , sizeof(work)))
+

Re: revamp vmem_altmap / dev_pagemap handling V3

2018-01-11 Thread Jerome Glisse
On Mon, Jan 08, 2018 at 12:26:46PM +0100, Christoph Hellwig wrote:
> Any chance to get this fully reviewed and picked up before the
> end of the merge window?

Sorry for taking so long to get to that, i looked at all the patches
and did not see anything obviously wrong and i like the cleanup so

Reviewed-by: Jérôme Glisse 


[PATCH v6 2/2] cxl: read PHB indications from the device tree

2018-01-11 Thread Philippe Bergheaud
Configure the P9 XSL_DSNCTL register with PHB indications found
in the device tree, or else use legacy hard-coded values.

Signed-off-by: Philippe Bergheaud 
---
Changelog:

v2: New patch. Use the new device tree property "ibm,phb-indications".

v3: No change.

v4: No functional change.
Drop cosmetic fix in comment.

v5: get_phb_indications():
  - make static variables local to function.
  - return static variable values by arguments.

v6: get_phb_indications():
  - acquire a mutex before setting the phb indications.

This patch depends on the following skiboot patch:
  https://patchwork.ozlabs.org/patch/858324/
---
 drivers/misc/cxl/cxl.h|  2 +-
 drivers/misc/cxl/cxllib.c |  2 +-
 drivers/misc/cxl/pci.c| 50 ++-
 3 files changed, 47 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a4062904a..5a6e9a921c2b 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -1062,7 +1062,7 @@ int cxl_psl_purge(struct cxl_afu *afu);
 int cxl_calc_capp_routing(struct pci_dev *dev, u64 *chipid,
  u32 *phb_index, u64 *capp_unit_id);
 int cxl_slot_is_switched(struct pci_dev *dev);
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg);
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg);
 u64 cxl_calculate_sr(bool master, bool kernel, bool real_mode, bool p9);
 
 void cxl_native_irq_dump_regs_psl9(struct cxl_context *ctx);
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc1807fdf..61f80d586279 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -99,7 +99,7 @@ int cxllib_get_xsl_config(struct pci_dev *dev, struct 
cxllib_xsl_config *cfg)
if (rc)
return rc;
 
-   rc = cxl_get_xsl9_dsnctl(capp_unit_id, >dsnctl);
+   rc = cxl_get_xsl9_dsnctl(dev, capp_unit_id, >dsnctl);
if (rc)
return rc;
if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c
index 19969ee86d6f..150883d761f1 100644
--- a/drivers/misc/cxl/pci.c
+++ b/drivers/misc/cxl/pci.c
@@ -409,21 +409,61 @@ int cxl_calc_capp_routing(struct pci_dev *dev, u64 
*chipid,
return 0;
 }
 
-int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
+static DEFINE_MUTEX(indications_mutex);
+
+static int get_phb_indications(struct pci_dev *dev, u64* capiind, u64 *asnind,
+  u64 *nbwind)
+{
+   static u64 nbw, asn, capi = 0;
+   struct device_node *np;
+   const __be32 *prop;
+
+   if (!capi) {
+   mutex_lock(_mutex);
+   if (!capi) {
+   if (!(np = pnv_pci_get_phb_node(dev))) {
+   mutex_unlock(_mutex);
+   return -1;
+   }
+
+   prop = of_get_property(np, "ibm,phb-indications", NULL);
+   if (!prop) {
+   nbw = 0x0300UL; /* legacy values */
+   asn = 0x0400UL;
+   capi = 0x0200UL;
+   } else {
+   nbw = (u64)be32_to_cpu(prop[2]);
+   asn = (u64)be32_to_cpu(prop[1]);
+   capi = (u64)be32_to_cpu(prop[0]);
+   }
+   of_node_put(np);
+   }
+   mutex_unlock(_mutex);
+   }
+   *capiind = capi;
+   *asnind = asn;
+   *nbwind = nbw;
+   return 0;
+}
+
+int cxl_get_xsl9_dsnctl(struct pci_dev *dev, u64 capp_unit_id, u64 *reg)
 {
u64 xsl_dsnctl;
+   u64 capiind, asnind, nbwind;
 
/*
 * CAPI Identifier bits [0:7]
 * bit 61:60 MSI bits --> 0
 * bit 59 TVT selector --> 0
 */
+   if (get_phb_indications(dev, , , ))
+   return -1;
 
/*
 * Tell XSL where to route data to.
 * The field chipid should match the PHB CAPI_CMPM register
 */
-   xsl_dsnctl = ((u64)0x2 << (63-7)); /* Bit 57 */
+   xsl_dsnctl = (capiind << (63-15)); /* Bit 57 */
xsl_dsnctl |= (capp_unit_id << (63-15));
 
/* nMMU_ID Defaults to: b’01001’*/
@@ -437,14 +477,14 @@ int cxl_get_xsl9_dsnctl(u64 capp_unit_id, u64 *reg)
 * nbwind=0x03, bits [57:58], must include capi indicator.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x03 << (63-47));
+   xsl_dsnctl |= (nbwind << (63-55));
 
/*
 * Upper 16b address bits of ASB_Notify messages sent to the
 * system. Need to match the PHB’s ASN Compare/Mask Register.
 * Not supported on P9 DD1.
 */
-   xsl_dsnctl |= ((u64)0x04 << (63-55));
+   xsl_dsnctl |= asnind;

[PATCH v6 1/2] powerpc/powernv: Enable tunneled operations

2018-01-11 Thread Philippe Bergheaud
P9 supports PCI tunneled operations (atomics and as_notify). This
patch adds support for tunneled operations on powernv, with a new
API, to be called by device drivers:

pnv_pci_get_tunnel_ind()
   Tell driver the 16-bit ASN indication used by kernel.

pnv_pci_set_tunnel_bar()
   Tell kernel the Tunnel BAR Response address used by driver.
   This function uses two new OPAL calls, as the PBCQ Tunnel BAR
   register is configured by skiboot.

pnv_pci_get_as_notify_info()
   Return the ASN info of the thread to be woken up.

Signed-off-by: Philippe Bergheaud 
---
Changelog:

v2: Do not set the ASN indication. Get it from the device tree.

v3: Make pnv_pci_get_phb_node() available when compiling without cxl.

v4: Add pnv_pci_get_as_notify_info().
Rebase opal call numbers on skiboot 5.9.6.

v5: pnv_pci_get_tunnel_ind():
  - fix node reference count
pnv_pci_get_as_notify_info():
  - fail if task == NULL
  - read pid from mm->context.id
  - explain that thread.tidr require CONFIG_PPC64

v6: pnv_pci_get_tunnel_ind():
  - check if radix is enabled, or else return an error
 pnv_pci_get_as_notify_info():
  - remove a capi-specific comment, irrelevant for pci

This patch depends on the following skiboot patches:
  https://patchwork.ozlabs.org/patch/858324/
  https://patchwork.ozlabs.org/patch/858325/
---
 arch/powerpc/include/asm/opal-api.h|   4 +-
 arch/powerpc/include/asm/opal.h|   2 +
 arch/powerpc/include/asm/pnv-pci.h |   5 ++
 arch/powerpc/platforms/powernv/opal-wrappers.S |   2 +
 arch/powerpc/platforms/powernv/pci-cxl.c   |   8 --
 arch/powerpc/platforms/powernv/pci.c   | 106 +
 6 files changed, 118 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/include/asm/opal-api.h 
b/arch/powerpc/include/asm/opal-api.h
index 233c7504b1f2..b901f4d9f009 100644
--- a/arch/powerpc/include/asm/opal-api.h
+++ b/arch/powerpc/include/asm/opal-api.h
@@ -201,7 +201,9 @@
 #define OPAL_SET_POWER_SHIFT_RATIO 155
 #define OPAL_SENSOR_GROUP_CLEAR156
 #define OPAL_PCI_SET_P2P   157
-#define OPAL_LAST  157
+#define OPAL_PCI_GET_PBCQ_TUNNEL_BAR   159
+#define OPAL_PCI_SET_PBCQ_TUNNEL_BAR   160
+#define OPAL_LAST  160
 
 /* Device tree flags */
 
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 0c545f7fc77b..8705e422b893 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -198,6 +198,8 @@ int64_t opal_unregister_dump_region(uint32_t id);
 int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
 int64_t opal_config_cpu_idle_state(uint64_t state, uint64_t flag);
 int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t 
pe_number);
+int64_t opal_pci_get_pbcq_tunnel_bar(uint64_t phb_id, uint64_t *addr);
+int64_t opal_pci_set_pbcq_tunnel_bar(uint64_t phb_id, uint64_t addr);
 int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
 int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index 3e5cf251ad9a..c69de3276b5e 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -29,6 +29,11 @@ extern int pnv_pci_set_power_state(uint64_t id, uint8_t 
state,
 extern int pnv_pci_set_p2p(struct pci_dev *initiator, struct pci_dev *target,
   u64 desc);
 
+extern int pnv_pci_get_tunnel_ind(struct pci_dev *dev, uint64_t *ind);
+extern int pnv_pci_set_tunnel_bar(struct pci_dev *dev, uint64_t addr,
+ int enable);
+extern int pnv_pci_get_as_notify_info(struct task_struct *task, u32 *lpid,
+ u32 *pid, u32 *tid);
 int pnv_phb_to_cxl_mode(struct pci_dev *dev, uint64_t mode);
 int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
   unsigned int virq);
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S 
b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 6f4b00a2ac46..5da790fb7fef 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -320,3 +320,5 @@ OPAL_CALL(opal_set_powercap,
OPAL_SET_POWERCAP);
 OPAL_CALL(opal_get_power_shift_ratio,  OPAL_GET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_set_power_shift_ratio,  OPAL_SET_POWER_SHIFT_RATIO);
 OPAL_CALL(opal_sensor_group_clear, OPAL_SENSOR_GROUP_CLEAR);
+OPAL_CALL(opal_pci_get_pbcq_tunnel_bar,
OPAL_PCI_GET_PBCQ_TUNNEL_BAR);
+OPAL_CALL(opal_pci_set_pbcq_tunnel_bar,
OPAL_PCI_SET_PBCQ_TUNNEL_BAR);
diff --git a/arch/powerpc/platforms/powernv/pci-cxl.c 

Re: [PATCH 00/26] KVM: PPC: Book3S PR: Transaction memory support on PR KVM

2018-01-11 Thread Gustavo Romero
Hi Simon,

On 01/11/2018 08:11 AM, wei.guo.si...@gmail.com wrote:
> From: Simon Guo 
> 
> In current days, many OS distributions have utilized transaction
> memory functionality. In PowerPC, HV KVM supports TM. But PR KVM
> does not.
> 
> The drive for the transaction memory support of PR KVM is the
> openstack Continuous Integration testing - They runs a HV(hypervisor)
> KVM(as level 1) and then run PR KVM(as level 2) on top of that.
> 
> This patch set add transaction memory support on PR KVM.

Is this correct to assume that this emulation mode will just kick in on P9
with kernel TM workarounds and HV KVM will continue to be used on POWER8
since HV KVM is supported on POWER8 hosts?


Regards,
Gustavo

> Test cases performed:
> linux/tools/testing/selftests/powerpc/tm/tm-syscall
> linux/tools/testing/selftests/powerpc/tm/tm-fork
> linux/tools/testing/selftests/powerpc/tm/tm-vmx-unavail
> linux/tools/testing/selftests/powerpc/tm/tm-tmspr
> linux/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv
> linux/tools/testing/selftests/powerpc/math/vsx_preempt
> linux/tools/testing/selftests/powerpc/math/fpu_signal
> linux/tools/testing/selftests/powerpc/math/vmx_preempt
> linux/tools/testing/selftests/powerpc/math/fpu_syscall
> linux/tools/testing/selftests/powerpc/math/vmx_syscall
> linux/tools/testing/selftests/powerpc/math/fpu_preempt
> linux/tools/testing/selftests/powerpc/math/vmx_signal
> linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr
> linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr
> linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx
> linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr
> linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx
> https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c
> https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c
> https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c
> 
> Simon Guo (25):
>   KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate
> file
>   KVM: PPC: Book3S PR: add new parameter (guest MSR) for
> kvmppc_save_tm()/kvmppc_restore_tm()
>   KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()
>   KVM: PPC: Book3S PR: add C function wrapper for
> _kvmppc_save/restore_tm()
>   KVM: PPC: Book3S PR: In PR KVM suspends Transactional state when
> inject an interrupt.
>   KVM: PPC: Book3S PR: PR KVM pass through MSR TM/TS bits to shadow_msr.
>   KVM: PPC: Book3S PR: add TEXASR related macros
>   KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state
> guest
>   KVM: PPC: Book3S PR: implement RFID TM behavior to suppress change
> from S0 to N0
>   KVM: PPC: Book3S PR: set MSR HV bit accordingly for PPC970 and others.
>   KVM: PPC: Book3S PR: prevent TS bits change in kvmppc_interrupt_pr()
>   powerpc: export symbol msr_check_and_set().
>   KVM: PPC: Book3S PR: adds new
> kvmppc_copyto_vcpu_tm/kvmppc_copyfrom_vcpu_tm API for PR KVM.
>   KVM: PPC: Book3S PR: export tm_enable()/tm_disable/tm_abort() APIs
>   KVM: PPC: Book3S PR: add kvmppc_save/restore_tm_sprs() APIs
>   KVM: PPC: Book3S PR: add transaction memory save/restore skeleton for
> PR KVM
>   KVM: PPC: Book3S PR: add math support for PR KVM HTM
>   KVM: PPC: Book3S PR: make mtspr/mfspr emulation behavior based on
> active TM SPRs
>   KVM: PPC: Book3S PR: always fail transaction in guest privilege state
>   KVM: PPC: Book3S PR: enable NV reg restore for reading TM SPR at guest
> privilege state
>   KVM: PPC: Book3S PR: adds emulation for treclaim.
>   KVM: PPC: Book3S PR: add emulation for trechkpt in PR KVM.
>   KVM: PPC: Book3S PR: add emulation for tabort. for privilege guest
>   KVM: PPC: Book3S PR: add guard code to prevent returning to guest with
> PR=0 and Transactional state
>   KVM: PPC: Book3S PR: enable HTM for PR KVM for KVM_CHECK_EXTENSION
> ioctl
> 
>  arch/powerpc/include/asm/asm-prototypes.h   |  10 +
>  arch/powerpc/include/asm/kvm_book3s.h   |   8 +
>  arch/powerpc/include/asm/kvm_host.h |   3 +
>  arch/powerpc/include/asm/reg.h  |  25 +-
>  arch/powerpc/include/asm/tm.h   |   2 -
>  arch/powerpc/include/uapi/asm/tm.h  |   2 +-
>  arch/powerpc/kernel/process.c   |   1 +
>  arch/powerpc/kernel/tm.S|  12 +
>  arch/powerpc/kvm/Makefile   |   3 +
>  arch/powerpc/kvm/book3s.h   |   1 +
>  arch/powerpc/kvm/book3s_64_mmu.c|  11 +-
>  arch/powerpc/kvm/book3s_emulate.c   | 279 +++-
>  arch/powerpc/kvm/book3s_hv_rmhandlers.S | 259 ++-
>  arch/powerpc/kvm/book3s_pr.c| 256 +--
>  arch/powerpc/kvm/book3s_segment.S   |  13 +
>  arch/powerpc/kvm/powerpc.c  |   3 +-
>  arch/powerpc/kvm/tm.S   | 379 
> 
>  

[PATCH 26/26] KVM: PPC: Book3S PR: enable HTM for PR KVM for KVM_CHECK_EXTENSION ioctl

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

With current patch set, PR KVM now supports HTM. So this patch turns it
on for PR KVM.

Tested with:
https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c

Signed-off-by: Simon Guo 
---
 arch/powerpc/kvm/powerpc.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 1915e86..0b431aa 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -643,8 +643,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
 #endif
case KVM_CAP_PPC_HTM:
-   r = hv_enabled &&
-   (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
+   r = (cur_cpu_spec->cpu_user_features2 & PPC_FEATURE2_HTM_COMP);
break;
default:
r = 0;
-- 
1.8.3.1



[PATCH 25/26] KVM: PPC: Book3S PR: Support TAR handling for PR KVM HTM.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently guest kernel doesn't handle TAR fac unavailable and it always
runs with TAR bit on. PR KVM will lazily enable TAR. TAR is not a
frequent-use reg and it is not included in SVCPU struct.

To make it work for transaction memory at PR KVM:
1). Flush/giveup TAR at kvmppc_save_tm_pr().
2) If we are receiving a TAR fac unavail exception inside a transaction,
the checkpointed TAR might be a TAR value from another process. So we need
treclaim the transaction, then load the desired TAR value into reg, and
perform trecheckpoint.
3) Load TAR facility at kvmppc_restore_tm_pr() when TM active.
The reason we always loads TAR when restoring TM is that:
If we don't do this way, when there is a TAR fac unavailable exception
during TM active:
case 1: it is the 1st TAR fac unavail exception after tbegin.
vcpu->arch.tar should be reloaded as checkpoint tar val.
case 2: it is the 2nd or later TAR fac unavail exception after tbegin.
vcpu->arch.tar_tm should be reloaded as checkpoint tar val.
There will be unnecessary difficulty to handle the above 2 cases.

at the end of emulating treclaim., the correct TAR val need to be loaded
into reg if FSCR_TAR bit is on.
at the beginning of emulating trechkpt., TAR needs to be flushed so that
the right tar val can be copy into tar_tm.

Tested with:
tools/testing/selftests/powerpc/tm/tm-tar
tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar (remove DSCR/PPR
related testing).

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_emulate.c |  4 
 arch/powerpc/kvm/book3s_pr.c  | 31 +--
 arch/powerpc/kvm/tm.S | 16 ++--
 4 files changed, 48 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 8bd454c..6635506 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -259,6 +259,7 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
 void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu);
+void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 #endif
 
 extern int kvm_irq_bypass;
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 11d76be..52ae307 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -167,6 +167,9 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, 
int ra_val)
mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
tm_disable();
preempt_enable();
+
+   if (vcpu->arch.shadow_fscr & FSCR_TAR)
+   mtspr(SPRN_TAR, vcpu->arch.tar);
 }
 
 static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
@@ -183,6 +186,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
 * copy.
 */
kvmppc_giveup_ext(vcpu, MSR_VSX);
+   kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
kvmppc_copyto_vcpu_tm(vcpu);
kvmppc_restore_tm_pr(vcpu);
preempt_enable();
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cc568bc..9085524 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -56,7 +56,6 @@
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 ulong msr);
 static int kvmppc_load_ext(struct kvm_vcpu *vcpu, ulong msr);
-static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
 /* Some compatibility defines */
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -306,6 +305,7 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
vcpu->arch.save_msr_tm |= (vcpu->arch.guest_owned_ext &
(MSR_FP | MSR_VEC | MSR_VSX));
 
+   kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
kvmppc_giveup_ext(vcpu, MSR_VSX);
 
preempt_disable();
@@ -320,8 +320,20 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
return;
}
 
+
preempt_disable();
_kvmppc_restore_tm_pr(vcpu, vcpu->arch.save_msr_tm);
+
+   if (!(vcpu->arch.shadow_fscr & FSCR_TAR)) {
+   /* always restore TAR in TM active state, since we don't
+* want to be confused at fac unavailable while TM active:
+* load vcpu->arch.tar or vcpu->arch.tar_tm as chkpt value?
+*/
+   current->thread.tar = mfspr(SPRN_TAR);
+   mtspr(SPRN_TAR, vcpu->arch.tar);
+   vcpu->arch.shadow_fscr |= FSCR_TAR;
+   }
+
preempt_enable();
 
if (vcpu->arch.save_msr_tm & MSR_VSX)
@@ -333,6 +345,7 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
if (vcpu->arch.save_msr_tm & MSR_FP)
kvmppc_load_ext(vcpu, MSR_FP);
}
+
 }
 #endif
 
@@ -828,7 +841,7 @@ void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong 

[PATCH 24/26] KVM: PPC: Book3S PR: add guard code to prevent returning to guest with PR=0 and Transactional state

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently PR KVM doesn't support transaction memory at guest privilege
state.

This patch adds a check at setting guest msr, so that we can never return
to guest with PR=0 and TS=0b10. A tabort will be emulated to indicate
this and fail transaction immediately.

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/uapi/asm/tm.h |  2 +-
 arch/powerpc/kvm/book3s.h  |  1 +
 arch/powerpc/kvm/book3s_emulate.c  |  2 +-
 arch/powerpc/kvm/book3s_pr.c   | 13 -
 4 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/uapi/asm/tm.h 
b/arch/powerpc/include/uapi/asm/tm.h
index e1bf0e2..e2947c9 100644
--- a/arch/powerpc/include/uapi/asm/tm.h
+++ b/arch/powerpc/include/uapi/asm/tm.h
@@ -13,7 +13,7 @@
 #define TM_CAUSE_TLBI  0xdc
 #define TM_CAUSE_FAC_UNAV  0xda
 #define TM_CAUSE_SYSCALL   0xd8
-#define TM_CAUSE_MISC  0xd6  /* future use */
+#define TM_CAUSE_PRIV_T0xd6
 #define TM_CAUSE_SIGNAL0xd4
 #define TM_CAUSE_ALIGNMENT 0xd2
 #define TM_CAUSE_EMULATE   0xd0
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index d2b3ec0..9beb57b 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -32,4 +32,5 @@ extern int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu,
 extern int kvmppc_book3s_init_pr(void);
 extern void kvmppc_book3s_exit_pr(void);
 
+extern void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val);
 #endif
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 65eb236..11d76be 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -196,7 +196,7 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
 }
 
 /* emulate tabort. at guest privilege state */
-static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
 {
/* currently we only emulate tabort. but no emulation of other
 * tabort variants since there is no kernel usage of them at
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index f65415b..cc568bc 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -461,12 +461,23 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu 
*vcpu)
 
 static void kvmppc_set_msr_pr(struct kvm_vcpu *vcpu, u64 msr)
 {
-   ulong old_msr = kvmppc_get_msr(vcpu);
+   ulong old_msr;
 
 #ifdef EXIT_DEBUG
printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
 #endif
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /* We should never target guest MSR to TS=10 && PR=0,
+* since we always fail transaction for guest privilege
+* state.
+*/
+   if (!(msr & MSR_PR) && MSR_TM_TRANSACTIONAL(msr))
+   kvmppc_emulate_tabort(vcpu,
+   TM_CAUSE_PRIV_T | TM_CAUSE_PERSISTENT);
+#endif
+
+   old_msr = kvmppc_get_msr(vcpu);
msr &= to_book3s(vcpu)->msr_mask;
kvmppc_set_msr_fast(vcpu, msr);
kvmppc_recalc_shadow_msr(vcpu);
-- 
1.8.3.1



[PATCH 23/26] KVM: PPC: Book3S PR: add emulation for tabort. for privilege guest

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently privilege guest will be run with TM disabled.

Although the privilege guest cannot initiate a new transaction,
it can use tabort to terminate its problem state's transaction.
So it is still necessary to emulate tabort. for privilege guest.

This patch adds emulation for tabort. of privilege guest.

Tested with:
https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_emulate.c | 31 +++
 arch/powerpc/kvm/book3s_pr.c  |  2 +-
 3 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 524cd82..8bd454c 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -258,6 +258,7 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
+void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu);
 #endif
 
 extern int kvm_irq_bypass;
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 52a2e46..65eb236 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -50,6 +50,7 @@
 #define OP_31_XOP_SLBMFEE  915
 
 #define OP_31_XOP_TBEGIN   654
+#define OP_31_XOP_TABORT   910
 
 #define OP_31_XOP_TRECLAIM 942
 #define OP_31_XOP_TRCHKPT  1006
@@ -193,6 +194,19 @@ static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
guest_msr |= MSR_TS_S;
kvmppc_set_msr(vcpu, guest_msr);
 }
+
+/* emulate tabort. at guest privilege state */
+static void kvmppc_emulate_tabort(struct kvm_vcpu *vcpu, int ra_val)
+{
+   /* currently we only emulate tabort. but no emulation of other
+* tabort variants since there is no kernel usage of them at
+* present.
+*/
+   tm_enable();
+   tm_abort(ra_val);
+   tm_disable();
+}
+
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -459,6 +473,23 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
emulated = EMULATE_FAIL;
break;
}
+   case OP_31_XOP_TABORT:
+   {
+   ulong guest_msr = kvmppc_get_msr(vcpu);
+   unsigned long ra_val = 0;
+
+   /* only emulate for privilege guest, since problem state
+* guest can run with TM enabled and we don't expect to
+* trap at here for that case.
+*/
+   WARN_ON(guest_msr & MSR_PR);
+
+   if (ra)
+   ra_val = kvmppc_get_gpr(vcpu, ra);
+
+   kvmppc_emulate_tabort(vcpu, ra_val);
+   break;
+   }
case OP_31_XOP_TRECLAIM:
{
ulong guest_msr = kvmppc_get_msr(vcpu);
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 1d105fa..f65415b 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -246,7 +246,7 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
 {
tm_enable();
vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
-- 
1.8.3.1



[PATCH 22/26] KVM: PPC: Book3S PR: add emulation for trechkpt in PR KVM.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch adds host emulation when guest PR KVM executes "trechkpt.",
which is a privileged instruction and will trap into host.

We firstly copy vcpu ongoing content into vcpu tm checkpoint
content, then perform kvmppc_restore_tm_pr() to do trechkpt.
with updated vcpu tm checkpoint vals.

Signed-off-by: Simon Guo 
---
 arch/powerpc/kvm/book3s_emulate.c | 57 ++-
 1 file changed, 56 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 51c0e20..52a2e46 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -52,6 +52,7 @@
 #define OP_31_XOP_TBEGIN   654
 
 #define OP_31_XOP_TRECLAIM 942
+#define OP_31_XOP_TRCHKPT  1006
 
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ 1010
@@ -94,7 +95,7 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum 
priv_level level)
 }
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
+static void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
 {
memcpy(>arch.gpr_tm[0], >arch.gpr[0],
sizeof(vcpu->arch.gpr_tm));
@@ -166,6 +167,32 @@ static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, 
int ra_val)
tm_disable();
preempt_enable();
 }
+
+static void kvmppc_emulate_trchkpt(struct kvm_vcpu *vcpu)
+{
+   unsigned long guest_msr = kvmppc_get_msr(vcpu);
+
+   preempt_disable();
+   vcpu->arch.save_msr_tm = MSR_TS_S;
+   vcpu->arch.save_msr_tm &= ~(MSR_FP | MSR_VEC | MSR_VSX);
+   vcpu->arch.save_msr_tm |= (vcpu->arch.guest_owned_ext &
+   (MSR_FP | MSR_VEC | MSR_VSX));
+   /*
+* need flush FP/VEC/VSX to vcpu save area before
+* copy.
+*/
+   kvmppc_giveup_ext(vcpu, MSR_VSX);
+   kvmppc_copyto_vcpu_tm(vcpu);
+   kvmppc_restore_tm_pr(vcpu);
+   preempt_enable();
+
+   /*
+* as a result of trecheckpoint. set TS to suspended.
+*/
+   guest_msr &= ~(MSR_TS_MASK);
+   guest_msr |= MSR_TS_S;
+   kvmppc_set_msr(vcpu, guest_msr);
+}
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -457,6 +484,34 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
kvmppc_emulate_treclaim(vcpu, ra_val);
break;
}
+   case OP_31_XOP_TRCHKPT:
+   {
+   ulong guest_msr = kvmppc_get_msr(vcpu);
+   unsigned long texasr;
+
+   /* generate interrupt based on priorities */
+   if (guest_msr & MSR_PR) {
+   /* Privileged Instruction type Program Intr */
+   kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+   emulated = EMULATE_AGAIN;
+   break;
+   }
+
+   tm_enable();
+   texasr = mfspr(SPRN_TEXASR);
+   tm_disable();
+
+   if (MSR_TM_ACTIVE(guest_msr) ||
+   !(texasr & (TEXASR_FS))) {
+   /* TM bad thing interrupt */
+   kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+   emulated = EMULATE_AGAIN;
+   break;
+   }
+
+   kvmppc_emulate_trchkpt(vcpu);
+   break;
+   }
 #endif
default:
emulated = EMULATE_FAIL;
-- 
1.8.3.1



[PATCH 21/26] KVM: PPC: Book3S PR: adds emulation for treclaim.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch adds support for "treclaim." emulation when PR KVM guest
executes treclaim. and traps to host.

We will firstly doing treclaim. and save TM checkpoint and doing
treclaim. Then it is necessary to update vcpu current reg content
with checkpointed vals. When rfid into guest again, those vcpu
current reg content(now the checkpoint vals) will be loaded into
regs.

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/reg.h|  4 +++
 arch/powerpc/kvm/book3s_emulate.c | 66 ++-
 2 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 6c293bc..b3bcf6b 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -244,12 +244,16 @@
 #define SPRN_TEXASR0x82/* Transaction EXception & Summary */
 #define SPRN_TEXASRU   0x83/* ''  ''  ''Upper 32  */
 #define TEXASR_FC_LG   (63 - 7)/* Failure Code */
+#define TEXASR_AB_LG   (63 - 31)   /* Abort */
+#define TEXASR_SU_LG   (63 - 32)   /* Suspend */
 #define TEXASR_HV_LG   (63 - 34)   /* Hypervisor state*/
 #define TEXASR_PR_LG   (63 - 35)   /* Privilege level */
 #define TEXASR_FS_LG   (63 - 36)   /* failure summary */
 #define TEXASR_EX_LG   (63 - 37)   /* TFIAR exact bit */
 #define TEXASR_ROT_LG  (63 - 38)   /* ROT bit */
 #define TEXASR_FC  (ASM_CONST(0xFF) << TEXASR_FC_LG)
+#define TEXASR_AB  __MASK(TEXASR_AB_LG)
+#define TEXASR_SU  __MASK(TEXASR_SU_LG)
 #define TEXASR_HV  __MASK(TEXASR_HV_LG)
 #define TEXASR_PR  __MASK(TEXASR_PR_LG)
 #define TEXASR_FS  __MASK(TEXASR_FS_LG)
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 1eb1900..51c0e20 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -25,6 +25,7 @@
 #include 
 #include 
 #include "book3s.h"
+#include 
 
 #define OP_19_XOP_RFID 18
 #define OP_19_XOP_RFI  50
@@ -50,6 +51,8 @@
 
 #define OP_31_XOP_TBEGIN   654
 
+#define OP_31_XOP_TRECLAIM 942
+
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ 1010
 
@@ -109,7 +112,7 @@ void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
 }
 
-void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
+static void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
 {
memcpy(>arch.gpr[0], >arch.gpr_tm[0],
sizeof(vcpu->arch.gpr));
@@ -127,6 +130,42 @@ void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
 }
 
+static void kvmppc_emulate_treclaim(struct kvm_vcpu *vcpu, int ra_val)
+{
+   unsigned long guest_msr = kvmppc_get_msr(vcpu);
+   int fc_val = ra_val ? ra_val : 1;
+
+   kvmppc_save_tm_pr(vcpu);
+
+   preempt_disable();
+   kvmppc_copyfrom_vcpu_tm(vcpu);
+   preempt_enable();
+
+   /*
+* treclaim need quit to non-transactional state.
+*/
+   guest_msr &= ~(MSR_TS_MASK);
+   kvmppc_set_msr(vcpu, guest_msr);
+
+   preempt_disable();
+   tm_enable();
+   vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+   vcpu->arch.texasr &= ~TEXASR_FC;
+   vcpu->arch.texasr |= ((u64)fc_val << TEXASR_FC_LG);
+
+   vcpu->arch.texasr &= ~(TEXASR_PR | TEXASR_HV);
+   if (kvmppc_get_msr(vcpu) & MSR_PR)
+   vcpu->arch.texasr |= TEXASR_PR;
+
+   if (kvmppc_get_msr(vcpu) & MSR_HV)
+   vcpu->arch.texasr |= TEXASR_HV;
+
+   vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+   mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+   mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+   tm_disable();
+   preempt_enable();
+}
 #endif
 
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
@@ -393,6 +432,31 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
emulated = EMULATE_FAIL;
break;
}
+   case OP_31_XOP_TRECLAIM:
+   {
+   ulong guest_msr = kvmppc_get_msr(vcpu);
+   unsigned long ra_val = 0;
+
+   /* generate interrupt based on priorities */
+   if (guest_msr & MSR_PR) {
+   /* Privileged Instruction type Program 
Interrupt */
+   kvmppc_core_queue_program(vcpu, SRR1_PROGPRIV);
+   emulated = EMULATE_AGAIN;
+   break;
+   }
+
+   if (!MSR_TM_SUSPENDED(guest_msr)) {
+   /* TM bad thing interrupt */
+   kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+   emulated = EMULATE_AGAIN;
+   

[PATCH 20/26] KVM: PPC: Book3S PR: enable NV reg restore for reading TM SPR at guest privilege state

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently kvmppc_handle_fac() will not update NV GPRs and thus it can
return with GUEST_RESUME.

However PR KVM guest always disables MSR_TM bit at privilege state. If PR
privilege guest are trying to read TM SPRs, it will trigger TM facility
unavailable exception and fall into kvmppc_handle_fac(). Then the emulation
will be done by kvmppc_core_emulate_mfspr_pr(). The mfspr instruction can
include a RT with NV reg. So it is necessary to restore NV GPRs at this
case, to reflect the update to NV RT.

This patch make kvmppc_handle_fac() return GUEST_RESUME_NV at TM fac
exception and with guest privilege state.

Signed-off-by: Simon Guo 
---
 arch/powerpc/kvm/book3s_pr.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index a26f4db..1d105fa 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -1030,6 +1030,18 @@ static int kvmppc_handle_fac(struct kvm_vcpu *vcpu, 
ulong fac)
break;
}
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /* Since we disabled MSR_TM at privilege state, the mfspr instruction
+* for TM spr can trigger TM fac unavailable. In this case, the
+* emulation is handled by kvmppc_emulate_fac(), which invokes
+* kvmppc_emulate_mfspr() finally. But note the mfspr can include
+* RT for NV registers. So it need to restore those NV reg to reflect
+* the update.
+*/
+   if ((fac == FSCR_TM_LG) && !(kvmppc_get_msr(vcpu) & MSR_PR))
+   return RESUME_GUEST_NV;
+#endif
+
return RESUME_GUEST;
 }
 
@@ -1416,8 +1428,7 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
}
 #ifdef CONFIG_PPC_BOOK3S_64
case BOOK3S_INTERRUPT_FAC_UNAVAIL:
-   kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
-   r = RESUME_GUEST;
+   r = kvmppc_handle_fac(vcpu, vcpu->arch.shadow_fscr >> 56);
break;
 #endif
case BOOK3S_INTERRUPT_MACHINE_CHECK:
-- 
1.8.3.1



[PATCH 19/26] KVM: PPC: Book3S PR: always fail transaction in guest privilege state

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently kernel doesn't use transaction memory.
And there is an issue for privilege guest that:
tbegin/tsuspend/tresume/tabort TM instructions can impact MSR TM bits
without trap into PR host. So following code will lead to a false mfmsr
result:
tbegin  <- MSR bits update to Transaction active.
beq <- failover handler branch
mfmsr   <- still read MSR bits from magic page with
transaction inactive.

It is not an issue for non-privilege guest since its mfmsr is not patched
with magic page and will always trap into PR host.

This patch will always fail tbegin attempt for privilege guest, so that
the above issue is prevented. It is benign since currently (guest) kernel
doesn't initiate a transaction.

Test case:
https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/kvm_book3s.h |  1 +
 arch/powerpc/kvm/book3s_emulate.c | 34 ++
 arch/powerpc/kvm/book3s_pr.c  | 11 ++-
 3 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index d8dbfa5..524cd82 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -257,6 +257,7 @@ extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
 void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu);
 #endif
 
 extern int kvm_irq_bypass;
diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index c2836330..1eb1900 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "book3s.h"
 
 #define OP_19_XOP_RFID 18
@@ -47,6 +48,8 @@
 #define OP_31_XOP_EIOIO854
 #define OP_31_XOP_SLBMFEE  915
 
+#define OP_31_XOP_TBEGIN   654
+
 /* DCBZ is actually 1014, but we patch it to 1010 so we get a trap */
 #define OP_31_XOP_DCBZ 1010
 
@@ -360,6 +363,37 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
 
break;
}
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   case OP_31_XOP_TBEGIN:
+   {
+   if (!(kvmppc_get_msr(vcpu) & MSR_PR)) {
+   preempt_disable();
+   vcpu->arch.cr = (CR0_TBEGIN_FAILURE |
+ (vcpu->arch.cr & ~(CR0_MASK << CR0_SHIFT)));
+
+   vcpu->arch.texasr = (TEXASR_FS | TEXASR_EX |
+   (((u64)(TM_CAUSE_EMULATE | 
TM_CAUSE_PERSISTENT))
+<< TEXASR_FC_LG));
+
+   if ((inst >> 21) & 0x1)
+   vcpu->arch.texasr |= TEXASR_ROT;
+
+   if (kvmppc_get_msr(vcpu) & MSR_PR)
+   vcpu->arch.texasr |= TEXASR_PR;
+
+   if (kvmppc_get_msr(vcpu) & MSR_HV)
+   vcpu->arch.texasr |= TEXASR_HV;
+
+   vcpu->arch.tfhar = kvmppc_get_pc(vcpu) + 4;
+   vcpu->arch.tfiar = kvmppc_get_pc(vcpu);
+
+   kvmppc_restore_tm_sprs(vcpu);
+   preempt_enable();
+   } else
+   emulated = EMULATE_FAIL;
+   break;
+   }
+#endif
default:
emulated = EMULATE_FAIL;
}
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index c35bd02..a26f4db 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -255,7 +255,7 @@ static inline void kvmppc_save_tm_sprs(struct kvm_vcpu 
*vcpu)
tm_disable();
 }
 
-static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
 {
tm_enable();
mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
@@ -447,6 +447,15 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
(PVR_VER(guest_pvr) == PVR_970GX))
smsr |= MSR_HV;
 #endif
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /*
+* in guest privileged state, we want to fail all TM transactions.
+* So disable MSR TM bit so that all tbegin. will be able to be
+* trapped into host.
+*/
+   if (!(guest_msr & MSR_PR))
+   smsr &= ~MSR_TM;
+#endif
vcpu->arch.shadow_msr = smsr;
 }
 
-- 
1.8.3.1



[PATCH 18/26] KVM: PPC: Book3S PR: make mtspr/mfspr emulation behavior based on active TM SPRs

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

The mfspr/mtspr on TM SPRs(TEXASR/TFIAR/TFHAR) are non-privileged
instructions and can be executed at PR KVM guest without trapping
into host in problem state. We only emulate mtspr/mfspr
texasr/tfiar/tfhar at guest PR=0 state.

When we are emulating mtspr tm sprs at guest PR=0 state, the emulation
result need to be visible to guest PR=1 state. That is, the actual TM
SPR val should be loaded into actual registers.

We already flush TM SPRs into vcpu when switching out of CPU, and load
TM SPRs when switching back.

This patch corrects mfspr()/mtspr() emulation for TM SPRs to make the
actual source/dest based on actual TM SPRs.

Signed-off-by: Simon Guo 
---
 arch/powerpc/kvm/book3s_emulate.c | 35 +++
 1 file changed, 27 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index e096d01..c2836330 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -521,13 +521,26 @@ int kvmppc_core_emulate_mtspr_pr(struct kvm_vcpu *vcpu, 
int sprn, ulong spr_val)
break;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case SPRN_TFHAR:
-   vcpu->arch.tfhar = spr_val;
-   break;
case SPRN_TEXASR:
-   vcpu->arch.texasr = spr_val;
-   break;
case SPRN_TFIAR:
-   vcpu->arch.tfiar = spr_val;
+   if (MSR_TM_ACTIVE(kvmppc_get_msr(vcpu))) {
+   /* it is illegal to mtspr() TM regs in
+* other than non-transactional state.
+*/
+   kvmppc_core_queue_program(vcpu, SRR1_PROGTM);
+   emulated = EMULATE_AGAIN;
+   break;
+   }
+
+   tm_enable();
+   if (sprn == SPRN_TFHAR)
+   mtspr(SPRN_TFHAR, spr_val);
+   else if (sprn == SPRN_TEXASR)
+   mtspr(SPRN_TEXASR, spr_val);
+   else
+   mtspr(SPRN_TFIAR, spr_val);
+   tm_disable();
+
break;
 #endif
 #endif
@@ -674,13 +687,19 @@ int kvmppc_core_emulate_mfspr_pr(struct kvm_vcpu *vcpu, 
int sprn, ulong *spr_val
break;
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
case SPRN_TFHAR:
-   *spr_val = vcpu->arch.tfhar;
+   tm_enable();
+   *spr_val = mfspr(SPRN_TFHAR);
+   tm_disable();
break;
case SPRN_TEXASR:
-   *spr_val = vcpu->arch.texasr;
+   tm_enable();
+   *spr_val = mfspr(SPRN_TEXASR);
+   tm_disable();
break;
case SPRN_TFIAR:
-   *spr_val = vcpu->arch.tfiar;
+   tm_enable();
+   *spr_val = mfspr(SPRN_TFIAR);
+   tm_disable();
break;
 #endif
 #endif
-- 
1.8.3.1



[PATCH 17/26] KVM: PPC: Book3S PR: add math support for PR KVM HTM

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

The math registers will be saved into vcpu->arch.fp/vr and corresponding
vcpu->arch.fp_tm/vr_tm area.

We flush or giveup the math regs into vcpu->arch.fp/vr before saving
transaction. After transaction is restored, the math regs will be loaded
back into regs.

If there is a FP/VEC/VSX unavailable exception during transaction active
state, the math checkpoint content might be incorrect and we need to do
treclaim./load the correct checkpoint val/trechkpt. sequence to retry the
transaction.

If transaction is active, and the qemu process is switching out of CPU,
we need to keep the "guest_owned_ext" bits unchanged after qemu process
is switched back. The reason is that if we allow guest_owned_ext change
freely during a transaction, there will lack information to handle
FP/VEC/VSX unavailable exception during transaction active state.

Detail is as follows:
Assume we allow math bits to be given up freely during transaction:
- If it is the first FP unavailable exception after tbegin., vcpu->arch.fp/
vr need to be loaded for trechkpt.
- If it is the 2nd or subsequent FP unavailable exception after tbegin.,
vcpu->arch.fp_tm/vr_tm need to be loaded for trechkpt.
It will bring much additional complexity to cover both cases.

That is why we always save guest_owned_ext into vcpu->arch.save_msr_tm at
kvmppc_save_tm_pr(), then check those bits in vcpu->arch.save_msr_tm at
kvmppc_restore_tm_pr() to determine what math contents will be loaded.
With this, we will always load vcpu->arch.fp/vr in math unavailable
exception during active transaction.

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/kvm_host.h |   4 +-
 arch/powerpc/kvm/book3s_pr.c| 114 +---
 2 files changed, 95 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index eb3b821..1124c62 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -627,7 +627,9 @@ struct kvm_vcpu_arch {
struct thread_vr_state vr_tm;
u32 vrsave_tm; /* also USPRG0 */
 
-   u64 save_msr_tm; /* TS bits: whether TM restore is required */
+   u64 save_msr_tm; /* TS bits: whether TM restore is required
+ * FP/VEC/VSX bits: saved guest_owned_ext
+ */
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index eef0928..c35bd02 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -55,6 +55,7 @@
 
 static int kvmppc_handle_ext(struct kvm_vcpu *vcpu, unsigned int exit_nr,
 ulong msr);
+static int kvmppc_load_ext(struct kvm_vcpu *vcpu, ulong msr);
 static void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac);
 
 /* Some compatibility defines */
@@ -280,6 +281,33 @@ void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
return;
}
 
+   /* when we are in transaction active state and switch out of CPU,
+* we need to be careful to not "change" guest_owned_ext bits after
+* kvmppc_save_tm_pr()/kvmppc_restore_tm_pr() pair. The reason is
+* that we need to distinguish following 2 FP/VEC/VSX unavailable
+* exception cases in TM active state:
+* 1) tbegin. is executed with guest_owned_ext FP/VEC/VSX off. Then
+* there comes a FP/VEC/VSX unavailable exception during transaction.
+* In this case, the vcpu->arch.fp/vr contents need to be loaded as
+* checkpoint contents.
+* 2) tbegin. is executed with guest_owned_ext FP/VEC/VSX on. Then
+* there is task switch during suspended state. If we giveup ext and
+* update guest_owned_ext as no FP/VEC/VSX bits during context switch,
+* we need to load vcpu->arch.fp_tm/vr_tm contents as checkpoint
+* content.
+*
+* As a result, we don't change guest_owned_ext bits during
+* kvmppc_save/restore_tm_pr() pair. So that we can only use
+* vcpu->arch.fp/vr contents as checkpoint contents.
+* And we need to "save" the guest_owned_ext bits here who indicates
+* which math bits need to be "restored" in kvmppc_restore_tm_pr().
+*/
+   vcpu->arch.save_msr_tm &= ~(MSR_FP | MSR_VEC | MSR_VSX);
+   vcpu->arch.save_msr_tm |= (vcpu->arch.guest_owned_ext &
+   (MSR_FP | MSR_VEC | MSR_VSX));
+
+   kvmppc_giveup_ext(vcpu, MSR_VSX);
+
preempt_disable();
_kvmppc_save_tm_pr(vcpu, mfmsr());
preempt_enable();
@@ -295,6 +323,16 @@ void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
preempt_disable();
_kvmppc_restore_tm_pr(vcpu, vcpu->arch.save_msr_tm);
preempt_enable();
+
+   if (vcpu->arch.save_msr_tm & MSR_VSX)
+   kvmppc_load_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
+   else {
+   if 

[PATCH 16/26] KVM: PPC: Book3S PR: add transaction memory save/restore skeleton for PR KVM

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

The transaction memory checkpoint area save/restore behavior is
triggered when VCPU qemu process is switching out/into CPU. ie.
at kvmppc_core_vcpu_put_pr() and kvmppc_core_vcpu_load_pr().

MSR TM active state is determined by TS bits:
active: 10(transactional) or 01 (suspended)
inactive: 00 (non-transactional)
We don't "fake" TM functionality for guest. We "sync" guest virtual
MSR TM active state(10 or 01) with shadow MSR. That is to say,
we don't emulate a transactional guest with a TM inactive MSR.

TM SPR support(TFIAR/TFAR/TEXASR) has already been supported by
commit 9916d57e64a4 ("KVM: PPC: Book3S PR: Expose TM registers").
Math register support (FPR/VMX/VSX) will be done at subsequent
patch.

- TM save:
When kvmppc_save_tm_pr() is invoked, whether TM context need to
be saved can be determined by current host MSR state:
* TM active - save TM context
* TM inactive - no need to do so and only save TM SPRs.

- TM restore:
However when kvmppc_restore_tm_pr() is invoked, there is an
issue to determine whether TM restore should be performed.
The TM active host MSR val saved in kernel stack is not loaded yet.
We don't know whether there is a transaction to be restored from
current host MSR TM status at kvmppc_restore_tm_pr(). To solve this
issue, we save current MSR into vcpu->arch.save_msr_tm at
kvmppc_save_tm_pr(), and kvmppc_restore_tm_pr() check TS bits of
vcpu->arch.save_msr_tm to decide whether to do TM restore.

Signed-off-by: Simon Guo 
Suggested-by: Paul Mackerras 
---
 arch/powerpc/include/asm/kvm_book3s.h |  6 +
 arch/powerpc/include/asm/kvm_host.h   |  1 +
 arch/powerpc/kvm/book3s_pr.c  | 41 +++
 3 files changed, 48 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_book3s.h 
b/arch/powerpc/include/asm/kvm_book3s.h
index 9a66700..d8dbfa5 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -253,6 +253,12 @@ extern void kvmppc_copy_to_svcpu(struct 
kvmppc_book3s_shadow_vcpu *svcpu,
 struct kvm_vcpu *vcpu);
 extern void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
   struct kvmppc_book3s_shadow_vcpu *svcpu);
+
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu);
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu);
+#endif
+
 extern int kvm_irq_bypass;
 
 static inline struct kvmppc_vcpu_book3s *to_book3s(struct kvm_vcpu *vcpu)
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 3aa5b57..eb3b821 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -627,6 +627,7 @@ struct kvm_vcpu_arch {
struct thread_vr_state vr_tm;
u32 vrsave_tm; /* also USPRG0 */
 
+   u64 save_msr_tm; /* TS bits: whether TM restore is required */
 #endif
 
 #ifdef CONFIG_KVM_EXIT_TIMING
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 5224b3c..eef0928 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -43,6 +43,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -114,6 +115,9 @@ static void kvmppc_core_vcpu_load_pr(struct kvm_vcpu *vcpu, 
int cpu)
 
if (kvmppc_is_split_real(vcpu))
kvmppc_fixup_split_real(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   kvmppc_restore_tm_pr(vcpu);
+#endif
 }
 
 static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
@@ -131,6 +135,10 @@ static void kvmppc_core_vcpu_put_pr(struct kvm_vcpu *vcpu)
if (kvmppc_is_split_real(vcpu))
kvmppc_unfixup_split_real(vcpu);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   kvmppc_save_tm_pr(vcpu);
+#endif
+
kvmppc_giveup_ext(vcpu, MSR_FP | MSR_VEC | MSR_VSX);
kvmppc_giveup_fac(vcpu, FSCR_TAR_LG);
 
@@ -255,6 +263,39 @@ static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu 
*vcpu)
tm_disable();
 }
 
+void kvmppc_save_tm_pr(struct kvm_vcpu *vcpu)
+{
+   /*
+* When kvmppc_save_tm_pr() is invoked, whether TM context need to
+* be saved can be determined by current MSR TS active state.
+*
+* We save current MSR's TM TS bits into vcpu->arch.save_msr_tm.
+* So that kvmppc_restore_tm_pr() can decide to do TM restore or
+* not based on that.
+*/
+   vcpu->arch.save_msr_tm = mfmsr();
+
+   if (!(MSR_TM_ACTIVE(vcpu->arch.save_msr_tm))) {
+   kvmppc_save_tm_sprs(vcpu);
+   return;
+   }
+
+   preempt_disable();
+   _kvmppc_save_tm_pr(vcpu, mfmsr());
+   preempt_enable();
+}
+
+void kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu)
+{
+   if (!MSR_TM_ACTIVE(vcpu->arch.save_msr_tm)) {
+   kvmppc_restore_tm_sprs(vcpu);
+   return;
+   }
+
+   preempt_disable();
+   

[PATCH 15/26] KVM: PPC: Book3S PR: add kvmppc_save/restore_tm_sprs() APIs

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch adds 2 new APIs kvmppc_save_tm_sprs()/kvmppc_restore_tm_sprs()
for the purpose of TEXASR/TFIAR/TFHAR save/restore.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_pr.c | 22 ++
 1 file changed, 22 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index b2f7566..5224b3c 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "book3s.h"
 
@@ -235,6 +236,27 @@ void kvmppc_copy_from_svcpu(struct kvm_vcpu *vcpu,
preempt_enable();
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+static inline void kvmppc_save_tm_sprs(struct kvm_vcpu *vcpu)
+{
+   tm_enable();
+   vcpu->arch.tfhar = mfspr(SPRN_TFHAR);
+   vcpu->arch.texasr = mfspr(SPRN_TEXASR);
+   vcpu->arch.tfiar = mfspr(SPRN_TFIAR);
+   tm_disable();
+}
+
+static inline void kvmppc_restore_tm_sprs(struct kvm_vcpu *vcpu)
+{
+   tm_enable();
+   mtspr(SPRN_TFHAR, vcpu->arch.tfhar);
+   mtspr(SPRN_TEXASR, vcpu->arch.texasr);
+   mtspr(SPRN_TFIAR, vcpu->arch.tfiar);
+   tm_disable();
+}
+
+#endif
+
 static int kvmppc_core_check_requests_pr(struct kvm_vcpu *vcpu)
 {
int r = 1; /* Indicate we want to get back into the guest */
-- 
1.8.3.1



[PATCH 14/26] KVM: PPC: Book3S PR: export tm_enable()/tm_disable/tm_abort() APIs

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch exports tm_enable()/tm_disable/tm_abort() APIs, which
will be used for PR KVM transaction memory logic.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/include/asm/asm-prototypes.h |  3 +++
 arch/powerpc/include/asm/tm.h |  2 --
 arch/powerpc/kernel/tm.S  | 12 
 arch/powerpc/mm/hash_utils_64.c   |  1 +
 4 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 9c3b290..2a0f54e 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -133,4 +133,7 @@ unsigned long __init prom_init(unsigned long r3, unsigned 
long r4,
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 #endif
 
+void tm_enable(void);
+void tm_disable(void);
+void tm_abort(uint8_t cause);
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h
index b1658c9..e94f6db 100644
--- a/arch/powerpc/include/asm/tm.h
+++ b/arch/powerpc/include/asm/tm.h
@@ -10,12 +10,10 @@
 
 #ifndef __ASSEMBLY__
 
-extern void tm_enable(void);
 extern void tm_reclaim(struct thread_struct *thread,
   uint8_t cause);
 extern void tm_reclaim_current(uint8_t cause);
 extern void tm_recheckpoint(struct thread_struct *thread);
-extern void tm_abort(uint8_t cause);
 extern void tm_save_sprs(struct thread_struct *thread);
 extern void tm_restore_sprs(struct thread_struct *thread);
 
diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index b92ac8e..ff12f47 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -12,6 +12,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef CONFIG_VSX
 /* See fpu.S, this is borrowed from there */
@@ -55,6 +56,16 @@ _GLOBAL(tm_enable)
or  r4, r4, r3
mtmsrd  r4
 1: blr
+EXPORT_SYMBOL_GPL(tm_enable);
+
+_GLOBAL(tm_disable)
+   mfmsr   r4
+   li  r3, MSR_TM >> 32
+   sldir3, r3, 32
+   andcr4, r4, r3
+   mtmsrd  r4
+   blr
+EXPORT_SYMBOL_GPL(tm_disable);
 
 _GLOBAL(tm_save_sprs)
mfspr   r0, SPRN_TFHAR
@@ -78,6 +89,7 @@ _GLOBAL(tm_restore_sprs)
 _GLOBAL(tm_abort)
TABORT(R3)
blr
+EXPORT_SYMBOL_GPL(tm_abort);
 
 /* void tm_reclaim(struct thread_struct *thread,
  *uint8_t cause)
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 655a5a9..d354de6 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -63,6 +63,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #ifdef DEBUG
 #define DBG(fmt...) udbg_printf(fmt)
-- 
1.8.3.1



[PATCH 13/26] KVM: PPC: Book3S PR: adds new kvmppc_copyto_vcpu_tm/kvmppc_copyfrom_vcpu_tm API for PR KVM.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch adds 2 new APIs: kvmppc_copyto_vcpu_tm() and
kvmppc_copyfrom_vcpu_tm().  These 2 APIs will be used to copy from/to TM
data between VCPU_TM/VCPU area.

PR KVM will use these APIs for treclaim. or trchkpt. emulation.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_emulate.c | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 2eb457b..e096d01 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -87,6 +87,45 @@ static bool spr_allowed(struct kvm_vcpu *vcpu, enum 
priv_level level)
return true;
 }
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+void kvmppc_copyto_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+   memcpy(>arch.gpr_tm[0], >arch.gpr[0],
+   sizeof(vcpu->arch.gpr_tm));
+   memcpy(>arch.fp_tm, >arch.fp,
+   sizeof(struct thread_fp_state));
+   memcpy(>arch.vr_tm, >arch.vr,
+   sizeof(struct thread_vr_state));
+   vcpu->arch.ppr_tm = vcpu->arch.ppr;
+   vcpu->arch.dscr_tm = vcpu->arch.dscr;
+   vcpu->arch.amr_tm = vcpu->arch.amr;
+   vcpu->arch.ctr_tm = vcpu->arch.ctr;
+   vcpu->arch.tar_tm = vcpu->arch.tar;
+   vcpu->arch.lr_tm = vcpu->arch.lr;
+   vcpu->arch.cr_tm = vcpu->arch.cr;
+   vcpu->arch.vrsave_tm = vcpu->arch.vrsave;
+}
+
+void kvmppc_copyfrom_vcpu_tm(struct kvm_vcpu *vcpu)
+{
+   memcpy(>arch.gpr[0], >arch.gpr_tm[0],
+   sizeof(vcpu->arch.gpr));
+   memcpy(>arch.fp, >arch.fp_tm,
+   sizeof(struct thread_fp_state));
+   memcpy(>arch.vr, >arch.vr_tm,
+   sizeof(struct thread_vr_state));
+   vcpu->arch.ppr = vcpu->arch.ppr_tm;
+   vcpu->arch.dscr = vcpu->arch.dscr_tm;
+   vcpu->arch.amr = vcpu->arch.amr_tm;
+   vcpu->arch.ctr = vcpu->arch.ctr_tm;
+   vcpu->arch.tar = vcpu->arch.tar_tm;
+   vcpu->arch.lr = vcpu->arch.lr_tm;
+   vcpu->arch.cr = vcpu->arch.cr_tm;
+   vcpu->arch.vrsave = vcpu->arch.vrsave_tm;
+}
+
+#endif
+
 int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct kvm_vcpu *vcpu,
  unsigned int inst, int *advance)
 {
-- 
1.8.3.1



[PATCH 12/26] powerpc: export symbol msr_check_and_set().

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

PR KVM will need to reuse msr_check_and_set().
This patch exports this API for reuse.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kernel/process.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 72be0c3..8f430e6 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -152,6 +152,7 @@ unsigned long msr_check_and_set(unsigned long bits)
 
return newmsr;
 }
+EXPORT_SYMBOL_GPL(msr_check_and_set);
 
 void __msr_check_and_clear(unsigned long bits)
 {
-- 
1.8.3.1



[PATCH 11/26] KVM: PPC: Book3S PR: prevent TS bits change in kvmppc_interrupt_pr()

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

PR KVM host usually equipped with enabled TM in its host MSR value, and
with non-transactional TS value.

When a guest with TM active traps into PR KVM host, the rfid at the
tail of kvmppc_interrupt_pr() will try to switch TS bits from
S0 (Suspended & TM disabled) to N1 (Non-transactional & TM enabled).

That will leads to TM Bad Thing interrupt.

This patch manually sets target TS bits unchanged to avoid this
exception.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_segment.S | 13 +
 1 file changed, 13 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_segment.S 
b/arch/powerpc/kvm/book3s_segment.S
index 2a2b96d..675e9a2 100644
--- a/arch/powerpc/kvm/book3s_segment.S
+++ b/arch/powerpc/kvm/book3s_segment.S
@@ -383,6 +383,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
 */
 
PPC_LL  r6, HSTATE_HOST_MSR(r13)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /*
+* We don't want to change MSR[TS] bits via rfi here.
+* The actual TM handling logic will be in host with
+* recovered DR/IR bits after HSTATE_VMHANDLER.
+* And MSR_TM can be enabled in HOST_MSR so rfid may
+* not suppress this change and can lead to exception.
+* Manually set MSR to prevent TS state change here.
+*/
+   mfmsr   r7
+   rldicl  r7, r7, 64 - MSR_TS_S_LG, 62
+   rldimi  r6, r7, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+#endif
PPC_LL  r8, HSTATE_VMHANDLER(r13)
 
 #ifdef CONFIG_PPC64
-- 
1.8.3.1



[PATCH 10/26] KVM: PPC: Book3S PR: set MSR HV bit accordingly for PPC970 and others.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Apple G5 machines(PPC970/FX/GX/MP) have supervisor mode disabled and
MSR HV bit is forced into 1. We should follow this in PR KVM guest.

This patch set MSR HV=1 for G5 machines and HV=0 for others on PR
KVM guest.

Signed-off-by: Simon Guo 
Suggested-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_pr.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 7ec866a..b2f7566 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -320,6 +320,7 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
 {
ulong guest_msr = kvmppc_get_msr(vcpu);
ulong smsr = guest_msr;
+   u32 guest_pvr = vcpu->arch.pvr;
 
/* Guest MSR values */
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -334,7 +335,16 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
smsr |= (guest_msr & vcpu->arch.guest_owned_ext);
/* 64-bit Process MSR values */
 #ifdef CONFIG_PPC_BOOK3S_64
-   smsr |= MSR_ISF | MSR_HV;
+   smsr |= MSR_ISF;
+
+   /* for PPC970 chip, its HV bit is hard-wired to 1. For others,
+* we should clear HV bit.
+*/
+   if ((PVR_VER(guest_pvr) == PVR_970) ||
+   (PVR_VER(guest_pvr) == PVR_970FX) ||
+   (PVR_VER(guest_pvr) == PVR_970MP) ||
+   (PVR_VER(guest_pvr) == PVR_970GX))
+   smsr |= MSR_HV;
 #endif
vcpu->arch.shadow_msr = smsr;
 }
-- 
1.8.3.1



[PATCH 09/26] KVM: PPC: Book3S PR: implement RFID TM behavior to suppress change from S0 to N0

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Accordingly to ISA specification for RFID, in MSR TM disabled and TS
suspended state(S0), if the target MSR is TM disabled and TS state is
inactive(N0), rfid should suppress this update.

This patch make RFID emulation of PR KVM to be consistent with this.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_emulate.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_emulate.c 
b/arch/powerpc/kvm/book3s_emulate.c
index 68d6898..2eb457b 100644
--- a/arch/powerpc/kvm/book3s_emulate.c
+++ b/arch/powerpc/kvm/book3s_emulate.c
@@ -117,11 +117,28 @@ int kvmppc_core_emulate_op_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
case 19:
switch (get_xop(inst)) {
case OP_19_XOP_RFID:
-   case OP_19_XOP_RFI:
+   case OP_19_XOP_RFI: {
+   unsigned long srr1 = kvmppc_get_srr1(vcpu);
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+   /*
+* add rules to fit in ISA specification regarding TM
+* state transistion in TM disable/Suspended state,
+* and target TM state is TM inactive(00) state. (the
+* change should be suppressed).
+*/
+   if (((cur_msr & MSR_TM) == 0) &&
+   ((srr1 & MSR_TM) == 0) &&
+   MSR_TM_SUSPENDED(cur_msr) &&
+   !MSR_TM_ACTIVE(srr1))
+   srr1 |= MSR_TS_S;
+#endif
kvmppc_set_pc(vcpu, kvmppc_get_srr0(vcpu));
-   kvmppc_set_msr(vcpu, kvmppc_get_srr1(vcpu));
+   kvmppc_set_msr(vcpu, srr1);
*advance = 0;
break;
+   }
 
default:
emulated = EMULATE_FAIL;
-- 
1.8.3.1



[PATCH 08/26] KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state guest

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

MSR TS bits can be modified with non-privileged instruction like
tbegin./tend.  That means guest can change MSR value "silently" without
notifying host.

It is necessary to sync the TM bits to host so that host can calculate
shadow msr correctly.

note privilege guest will always fail transactions so we only take
care of problem state guest.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_pr.c | 25 +
 1 file changed, 25 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index 4e9acdd..7ec866a 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -977,6 +977,9 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
 {
int r = RESUME_HOST;
int s;
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   ulong old_msr = kvmppc_get_msr(vcpu);
+#endif
 
vcpu->stat.sum_exits++;
 
@@ -988,6 +991,28 @@ int kvmppc_handle_exit_pr(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
trace_kvm_exit(exit_nr, vcpu);
guest_exit();
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   /*
+* Unlike other MSR bits, MSR[TS]bits can be changed at guest without
+* notifying host:
+*  modified by unprivileged instructions like "tbegin"/"tend"/
+* "tresume"/"tsuspend" in PR KVM guest.
+*
+* It is necessary to sync here to calculate a correct shadow_msr.
+*
+* privileged guest's tbegin will be failed at present. So we
+* only take care of problem state guest.
+*/
+   if (unlikely((old_msr & MSR_PR) &&
+   (vcpu->arch.shadow_srr1 & (MSR_TS_MASK)) !=
+   (old_msr & (MSR_TS_MASK {
+   old_msr &= ~(MSR_TS_MASK);
+   old_msr |= (vcpu->arch.shadow_srr1 & (MSR_TS_MASK));
+   kvmppc_set_msr_fast(vcpu, old_msr);
+   kvmppc_recalc_shadow_msr(vcpu);
+   }
+#endif
+
switch (exit_nr) {
case BOOK3S_INTERRUPT_INST_STORAGE:
{
-- 
1.8.3.1



[PATCH 07/26] KVM: PPC: Book3S PR: add TEXASR related macros

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patches add some macros for CR0/TEXASR bits so that PR KVM TM
logic(tbegin./treclaim./tabort.) can make use of them later.

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/reg.h  | 21 -
 arch/powerpc/platforms/powernv/copy-paste.h |  3 +--
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index b779f3c..6c293bc 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -146,6 +146,12 @@
 #define MSR_64BIT  0
 #endif
 
+/* Condition Register related */
+#define CR0_SHIFT  28
+#define CR0_MASK   0xF
+#define CR0_TBEGIN_FAILURE (0x2 << 28) /* 0b0010 */
+
+
 /* Power Management - Processor Stop Status and Control Register Fields */
 #define PSSCR_RL_MASK  0x000F /* Requested Level */
 #define PSSCR_MTL_MASK 0x00F0 /* Maximum Transition Level */
@@ -237,8 +243,21 @@
 #define SPRN_TFIAR 0x81/* Transaction Failure Inst Addr   */
 #define SPRN_TEXASR0x82/* Transaction EXception & Summary */
 #define SPRN_TEXASRU   0x83/* ''  ''  ''Upper 32  */
-#define   TEXASR_FS__MASK(63-36) /* TEXASR Failure Summary */
+#define TEXASR_FC_LG   (63 - 7)/* Failure Code */
+#define TEXASR_HV_LG   (63 - 34)   /* Hypervisor state*/
+#define TEXASR_PR_LG   (63 - 35)   /* Privilege level */
+#define TEXASR_FS_LG   (63 - 36)   /* failure summary */
+#define TEXASR_EX_LG   (63 - 37)   /* TFIAR exact bit */
+#define TEXASR_ROT_LG  (63 - 38)   /* ROT bit */
+#define TEXASR_FC  (ASM_CONST(0xFF) << TEXASR_FC_LG)
+#define TEXASR_HV  __MASK(TEXASR_HV_LG)
+#define TEXASR_PR  __MASK(TEXASR_PR_LG)
+#define TEXASR_FS  __MASK(TEXASR_FS_LG)
+#define TEXASR_EX  __MASK(TEXASR_EX_LG)
+#define TEXASR_ROT __MASK(TEXASR_ROT_LG)
+
 #define SPRN_TFHAR 0x80/* Transaction Failure Handler Addr */
+
 #define SPRN_TIDR  144 /* Thread ID register */
 #define SPRN_CTRLF 0x088
 #define SPRN_CTRLT 0x098
diff --git a/arch/powerpc/platforms/powernv/copy-paste.h 
b/arch/powerpc/platforms/powernv/copy-paste.h
index c9a5036..3fa62de 100644
--- a/arch/powerpc/platforms/powernv/copy-paste.h
+++ b/arch/powerpc/platforms/powernv/copy-paste.h
@@ -7,9 +7,8 @@
  * 2 of the License, or (at your option) any later version.
  */
 #include 
+#include 
 
-#define CR0_SHIFT  28
-#define CR0_MASK   0xF
 /*
  * Copy/paste instructions:
  *
-- 
1.8.3.1



[PATCH 06/26] KVM: PPC: Book3S PR: PR KVM pass through MSR TM/TS bits to shadow_msr.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

PowerPC TM functionality needs MSR TM/TS bits support in hardware level.
Guest TM functionality can not be emulated with "fake" MSR (msr in magic
page) TS bits.

This patch syncs TM/TS bits in shadow_msr with the MSR value in magic
page, so that the MSR TS value which guest sees is consistent with actual
MSR bits running in guest.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_pr.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d0dc862..4e9acdd 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -322,7 +322,12 @@ static void kvmppc_recalc_shadow_msr(struct kvm_vcpu *vcpu)
ulong smsr = guest_msr;
 
/* Guest MSR values */
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+   smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE |
+   MSR_TM | MSR_TS_MASK;
+#else
smsr &= MSR_FE0 | MSR_FE1 | MSR_SF | MSR_SE | MSR_BE | MSR_LE;
+#endif
/* Process MSR values */
smsr |= MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_PR | MSR_EE;
/* External providers the guest reserved */
-- 
1.8.3.1



[PATCH 05/26] KVM: PPC: Book3S PR: In PR KVM suspends Transactional state when inject an interrupt.

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

This patch simulates interrupt behavior per Power ISA while injecting
interrupt in PR KVM:
- When interrupt happens, transactional state should be suspended.

kvmppc_mmu_book3s_64_reset_msr() will be invoked when injecting an
interrupt. This patch performs this ISA logic in
kvmppc_mmu_book3s_64_reset_msr().

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/book3s_64_mmu.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu.c b/arch/powerpc/kvm/book3s_64_mmu.c
index 29ebe2f..6048dbd 100644
--- a/arch/powerpc/kvm/book3s_64_mmu.c
+++ b/arch/powerpc/kvm/book3s_64_mmu.c
@@ -38,7 +38,16 @@
 
 static void kvmppc_mmu_book3s_64_reset_msr(struct kvm_vcpu *vcpu)
 {
-   kvmppc_set_msr(vcpu, vcpu->arch.intr_msr);
+   unsigned long msr = vcpu->arch.intr_msr;
+   unsigned long cur_msr = kvmppc_get_msr(vcpu);
+
+   /* If transactional, change to suspend mode on IRQ delivery */
+   if (MSR_TM_TRANSACTIONAL(cur_msr))
+   msr |= MSR_TS_S;
+   else
+   msr |= cur_msr & MSR_TS_MASK;
+
+   kvmppc_set_msr(vcpu, msr);
 }
 
 static struct kvmppc_slb *kvmppc_mmu_book3s_64_find_slbe(
-- 
1.8.3.1



[PATCH 04/26] KVM: PPC: Book3S PR: add C function wrapper for _kvmppc_save/restore_tm()

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

Currently _kvmppc_save/restore_tm() APIs can only be invoked from
assembly function. This patch adds C function wrappers for them so
that they can be safely called from C function.

Signed-off-by: Simon Guo 
---
 arch/powerpc/include/asm/asm-prototypes.h |   7 ++
 arch/powerpc/kvm/book3s_hv_rmhandlers.S   |   8 +--
 arch/powerpc/kvm/tm.S | 107 +-
 3 files changed, 116 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/asm-prototypes.h 
b/arch/powerpc/include/asm/asm-prototypes.h
index 7330150..9c3b290 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -126,4 +126,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned 
long r4,
 void _mcount(void);
 unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip);
 
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+/* Transaction memory related */
+struct kvm_vcpu;
+void _kvmppc_restore_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
+#endif
+
 #endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 613fd27..4c8d5b1 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -810,7 +810,7 @@ BEGIN_FTR_SECTION
 */
mr  r3, r4
ld  r4, VCPU_MSR(r3)
-   bl  kvmppc_restore_tm
+   bl  __kvmppc_restore_tm
ld  r4, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
@@ -1685,7 +1685,7 @@ BEGIN_FTR_SECTION
 */
mr  r3, r9
ld  r4, VCPU_MSR(r3)
-   bl  kvmppc_save_tm
+   bl  __kvmppc_save_tm
ld  r9, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
@@ -2551,7 +2551,7 @@ BEGIN_FTR_SECTION
 */
ld  r3, HSTATE_KVM_VCPU(r13)
ld  r4, VCPU_MSR(r3)
-   bl  kvmppc_save_tm
+   bl  __kvmppc_save_tm
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
@@ -2665,7 +2665,7 @@ BEGIN_FTR_SECTION
 */
mr  r3, r4
ld  r4, VCPU_MSR(r3)
-   bl  kvmppc_restore_tm
+   bl  __kvmppc_restore_tm
ld  r4, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 2d6fe5b..5752bae 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -35,7 +35,7 @@
  * This can modify all checkpointed registers, but
  * restores r1, r2 before exit.
  */
-_GLOBAL(kvmppc_save_tm)
+_GLOBAL(__kvmppc_save_tm)
mflrr0
std r0, PPC_LR_STKOFF(r1)
 
@@ -149,6 +149,58 @@ _GLOBAL(kvmppc_save_tm)
blr
 
 /*
+ * _kvmppc_save_tm() is a wrapper around __kvmppc_save_tm(), so that it can
+ * be invoked from C function by PR KVM only.
+ */
+_GLOBAL(_kvmppc_save_tm_pr)
+   mflrr5
+   std r5, PPC_LR_STKOFF(r1)
+   stdur1, -SWITCH_FRAME_SIZE(r1)
+   SAVE_NVGPRS(r1)
+
+   /* save MSR since TM/math bits might be impacted
+* by __kvmppc_save_tm().
+*/
+   mfmsr   r5
+   SAVE_GPR(5, r1)
+
+   /* also save DSCR/CR so that it can be recovered later */
+   mfspr   r6, SPRN_DSCR
+   SAVE_GPR(6, r1)
+
+   mfcrr7
+   stw r7, _CCR(r1)
+
+   /* allocate stack frame for __kvmppc_save_tm since
+* it will save LR into its stackframe and we don't
+* want to corrupt _kvmppc_save_tm_pr's.
+*/
+   stdur1, -PPC_MIN_STKFRM(r1)
+   bl  __kvmppc_save_tm
+   addir1, r1, PPC_MIN_STKFRM
+
+   ld  r7, _CCR(r1)
+   mtcrr7
+
+   REST_GPR(6, r1)
+   mtspr   SPRN_DSCR, r6
+
+   /* need preserve current MSR's MSR_TS bits */
+   REST_GPR(5, r1)
+   mfmsr   r6
+   rldicl  r6, r6, 64 - MSR_TS_S_LG, 62
+   rldimi  r5, r6, MSR_TS_S_LG, 63 - MSR_TS_T_LG
+   mtmsrd  r5
+
+   REST_NVGPRS(r1)
+   addir1, r1, SWITCH_FRAME_SIZE
+   ld  r5, PPC_LR_STKOFF(r1)
+   mtlrr5
+   blr
+
+EXPORT_SYMBOL_GPL(_kvmppc_save_tm_pr);
+
+/*
  * Restore transactional state and TM-related registers.
  * Called with:
  *  - r3 pointing to the vcpu struct.
@@ -158,7 +210,7 @@ _GLOBAL(kvmppc_save_tm)
  * This potentially modifies all checkpointed registers.
  * It restores r1, r2 from the PACA.
  */
-_GLOBAL(kvmppc_restore_tm)
+_GLOBAL(__kvmppc_restore_tm)
mflrr0
std r0, PPC_LR_STKOFF(r1)
 
@@ -186,6 +238,7 @@ _GLOBAL(kvmppc_restore_tm)
rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
beqlr   /* TM not active in guest */
std r1, HSTATE_SCRATCH2(r13)
+   std r3, HSTATE_SCRATCH1(r13)
 
/* Make sure the failure summary is set, otherwise we'll program check
 * when we trechkpt.  It's possible 

[PATCH 03/26] KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

kvmppc_save_tm() invokes  store_fp_state/store_vr_state(). So it is
mandatory to turn on FP/VSX/VMX MSR bits for its execution, just
like what kvmppc_restore_tm() did.

Previsouly HV KVM has turned the bits on outside of function
kvmppc_save_tm().  Now we include this bit change in kvmppc_save_tm()
so that the logic is more clean. And PR KVM can reuse it later.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/tm.S | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index e779b15..2d6fe5b 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -43,6 +43,8 @@ _GLOBAL(kvmppc_save_tm)
mfmsr   r8
li  r0, 1
rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
+   ori r8, r8, MSR_FP
+   orisr8, r8, (MSR_VEC | MSR_VSX)@h
mtmsrd  r8
 
rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
-- 
1.8.3.1



[PATCH 02/26] KVM: PPC: Book3S PR: add new parameter (guest MSR) for kvmppc_save_tm()/kvmppc_restore_tm()

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

HV KVM and PR KVM need different MSR source to indicate whether
treclaim. or trecheckpoint. is necessary.

This patch add new parameter (guest MSR) for these kvmppc_save_tm/
kvmppc_restore_tm() APIs:
- For HV KVM, it is VCPU_MSR
- For PR KVM, it is current host MSR or VCPU_SHADOW_SRR1

This enhancement enables these 2 APIs to be reused by PR KVM later.
And the patch keeps HV KVM logic unchanged.

This patch also reworks kvmppc_save_tm()/kvmppc_restore_tm() to
have a clean ABI: r3 for vcpu and r4 for guest_msr.

Signed-off-by: Simon Guo 
---
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 ++-
 arch/powerpc/kvm/tm.S   | 61 ++---
 2 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index a5c8ecd..613fd27 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -808,7 +808,10 @@ BEGIN_FTR_SECTION
/*
 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 */
+   mr  r3, r4
+   ld  r4, VCPU_MSR(r3)
bl  kvmppc_restore_tm
+   ld  r4, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
@@ -1680,7 +1683,10 @@ BEGIN_FTR_SECTION
/*
 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 */
+   mr  r3, r9
+   ld  r4, VCPU_MSR(r3)
bl  kvmppc_save_tm
+   ld  r9, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
@@ -2543,7 +2549,8 @@ BEGIN_FTR_SECTION
/*
 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 */
-   ld  r9, HSTATE_KVM_VCPU(r13)
+   ld  r3, HSTATE_KVM_VCPU(r13)
+   ld  r4, VCPU_MSR(r3)
bl  kvmppc_save_tm
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
@@ -2656,7 +2663,10 @@ BEGIN_FTR_SECTION
/*
 * NOTE THAT THIS TRASHES ALL NON-VOLATILE REGISTERS INCLUDING CR
 */
+   mr  r3, r4
+   ld  r4, VCPU_MSR(r3)
bl  kvmppc_restore_tm
+   ld  r4, HSTATE_KVM_VCPU(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
 
diff --git a/arch/powerpc/kvm/tm.S b/arch/powerpc/kvm/tm.S
index 072d35e..e779b15 100644
--- a/arch/powerpc/kvm/tm.S
+++ b/arch/powerpc/kvm/tm.S
@@ -28,9 +28,12 @@
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 /*
  * Save transactional state and TM-related registers.
- * Called with r9 pointing to the vcpu struct.
+ * Called with:
+ * - r3 pointing to the vcpu struct
+ * - r4 points to the MSR with current TS bits:
+ * (For HV KVM, it is VCPU_MSR ; For PR KVM, it is host MSR).
  * This can modify all checkpointed registers, but
- * restores r1, r2 and r9 (vcpu pointer) before exit.
+ * restores r1, r2 before exit.
  */
 _GLOBAL(kvmppc_save_tm)
mflrr0
@@ -42,11 +45,11 @@ _GLOBAL(kvmppc_save_tm)
rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
mtmsrd  r8
 
-   ld  r5, VCPU_MSR(r9)
-   rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
+   rldicl. r4, r4, 64 - MSR_TS_S_LG, 62
beq 1f  /* TM not active in guest. */
 
-   std r1, HSTATE_HOST_R1(r13)
+   std r1, HSTATE_SCRATCH2(r13)
+   std r3, HSTATE_SCRATCH1(r13)
li  r3, TM_CAUSE_KVM_RESCHED
 
/* Clear the MSR RI since r1, r13 are all going to be foobar. */
@@ -60,7 +63,7 @@ _GLOBAL(kvmppc_save_tm)
SET_SCRATCH0(r13)
GET_PACA(r13)
std r9, PACATMSCRATCH(r13)
-   ld  r9, HSTATE_KVM_VCPU(r13)
+   ld  r9, HSTATE_SCRATCH1(r13)
 
/* Get a few more GPRs free. */
std r29, VCPU_GPRS_TM(29)(r9)
@@ -92,7 +95,7 @@ _GLOBAL(kvmppc_save_tm)
std r4, VCPU_GPRS_TM(9)(r9)
 
/* Reload stack pointer and TOC. */
-   ld  r1, HSTATE_HOST_R1(r13)
+   ld  r1, HSTATE_SCRATCH2(r13)
ld  r2, PACATOC(r13)
 
/* Set MSR RI now we have r1 and r13 back. */
@@ -145,9 +148,13 @@ _GLOBAL(kvmppc_save_tm)
 
 /*
  * Restore transactional state and TM-related registers.
- * Called with r4 pointing to the vcpu struct.
+ * Called with:
+ *  - r3 pointing to the vcpu struct.
+ *  - r4 is the guest MSR with desired TS bits:
+ * For HV KVM, it is VCPU_MSR
+ * For PR KVM, it is provided by caller
  * This potentially modifies all checkpointed registers.
- * It restores r1, r2, r4 from the PACA.
+ * It restores r1, r2 from the PACA.
  */
 _GLOBAL(kvmppc_restore_tm)
mflrr0
@@ -166,17 +173,17 @@ _GLOBAL(kvmppc_restore_tm)
 * The user may change these outside of a transaction, so they must
 * always be context switched.
 */
-   ld  r5, VCPU_TFHAR(r4)
-   ld  r6, VCPU_TFIAR(r4)
-   ld  r7, VCPU_TEXASR(r4)
+   ld  r5, VCPU_TFHAR(r3)
+   ld  r6, VCPU_TFIAR(r3)
+   ld  r7, VCPU_TEXASR(r3)
  

[PATCH 01/26] KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate file

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

It is a simple patch just for moving kvmppc_save_tm/kvmppc_restore_tm()
functionalities to tm.S. There is no logic change. The reconstruct of
those APIs will be done in later patches to improve readability.

It is for preparation of reusing those APIs on both HV/PR PPC KVM.

Signed-off-by: Simon Guo 
Reviewed-by: Paul Mackerras 
---
 arch/powerpc/kvm/Makefile   |   3 +
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 239 
 arch/powerpc/kvm/tm.S   | 267 
 3 files changed, 270 insertions(+), 239 deletions(-)
 create mode 100644 arch/powerpc/kvm/tm.S

diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index 85ba80d..3886f1b 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -63,6 +63,9 @@ kvm-pr-y := \
book3s_64_mmu.o \
book3s_32_mmu.o
 
+kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
+   tm.o
+
 ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
 kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HANDLER) += \
book3s_rmhandlers.o
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S 
b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 2659844..a5c8ecd 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -39,8 +39,6 @@ BEGIN_FTR_SECTION;\
extsw   reg, reg;   \
 END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
 
-#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
-
 /* Values in HSTATE_NAPPING(r13) */
 #define NAPPING_CEDE   1
 #define NAPPING_NOVCPU 2
@@ -2951,243 +2949,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
mr  r4,r31
blr
 
-#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
-/*
- * Save transactional state and TM-related registers.
- * Called with r9 pointing to the vcpu struct.
- * This can modify all checkpointed registers, but
- * restores r1, r2 and r9 (vcpu pointer) before exit.
- */
-kvmppc_save_tm:
-   mflrr0
-   std r0, PPC_LR_STKOFF(r1)
-
-   /* Turn on TM. */
-   mfmsr   r8
-   li  r0, 1
-   rldimi  r8, r0, MSR_TM_LG, 63-MSR_TM_LG
-   mtmsrd  r8
-
-   ld  r5, VCPU_MSR(r9)
-   rldicl. r5, r5, 64 - MSR_TS_S_LG, 62
-   beq 1f  /* TM not active in guest. */
-
-   std r1, HSTATE_HOST_R1(r13)
-   li  r3, TM_CAUSE_KVM_RESCHED
-
-   /* Clear the MSR RI since r1, r13 are all going to be foobar. */
-   li  r5, 0
-   mtmsrd  r5, 1
-
-   /* All GPRs are volatile at this point. */
-   TRECLAIM(R3)
-
-   /* Temporarily store r13 and r9 so we have some regs to play with */
-   SET_SCRATCH0(r13)
-   GET_PACA(r13)
-   std r9, PACATMSCRATCH(r13)
-   ld  r9, HSTATE_KVM_VCPU(r13)
-
-   /* Get a few more GPRs free. */
-   std r29, VCPU_GPRS_TM(29)(r9)
-   std r30, VCPU_GPRS_TM(30)(r9)
-   std r31, VCPU_GPRS_TM(31)(r9)
-
-   /* Save away PPR and DSCR soon so don't run with user values. */
-   mfspr   r31, SPRN_PPR
-   HMT_MEDIUM
-   mfspr   r30, SPRN_DSCR
-   ld  r29, HSTATE_DSCR(r13)
-   mtspr   SPRN_DSCR, r29
-
-   /* Save all but r9, r13 & r29-r31 */
-   reg = 0
-   .rept   29
-   .if (reg != 9) && (reg != 13)
-   std reg, VCPU_GPRS_TM(reg)(r9)
-   .endif
-   reg = reg + 1
-   .endr
-   /* ... now save r13 */
-   GET_SCRATCH0(r4)
-   std r4, VCPU_GPRS_TM(13)(r9)
-   /* ... and save r9 */
-   ld  r4, PACATMSCRATCH(r13)
-   std r4, VCPU_GPRS_TM(9)(r9)
-
-   /* Reload stack pointer and TOC. */
-   ld  r1, HSTATE_HOST_R1(r13)
-   ld  r2, PACATOC(r13)
-
-   /* Set MSR RI now we have r1 and r13 back. */
-   li  r5, MSR_RI
-   mtmsrd  r5, 1
-
-   /* Save away checkpinted SPRs. */
-   std r31, VCPU_PPR_TM(r9)
-   std r30, VCPU_DSCR_TM(r9)
-   mflrr5
-   mfcrr6
-   mfctr   r7
-   mfspr   r8, SPRN_AMR
-   mfspr   r10, SPRN_TAR
-   mfxer   r11
-   std r5, VCPU_LR_TM(r9)
-   stw r6, VCPU_CR_TM(r9)
-   std r7, VCPU_CTR_TM(r9)
-   std r8, VCPU_AMR_TM(r9)
-   std r10, VCPU_TAR_TM(r9)
-   std r11, VCPU_XER_TM(r9)
-
-   /* Restore r12 as trap number. */
-   lwz r12, VCPU_TRAP(r9)
-
-   /* Save FP/VSX. */
-   addir3, r9, VCPU_FPRS_TM
-   bl  store_fp_state
-   addir3, r9, VCPU_VRS_TM
-   bl  store_vr_state
-   mfspr   r6, SPRN_VRSAVE
-   stw r6, VCPU_VRSAVE_TM(r9)
-1:
-   /*
-* We need to save these SPRs after the treclaim so that the software
-* error code is recorded correctly in the TEXASR.  Also the user may
-* change these outside of a transaction, so they must always be
-* context switched.
-

[PATCH 00/26] KVM: PPC: Book3S PR: Transaction memory support on PR KVM

2018-01-11 Thread wei . guo . simon
From: Simon Guo 

In current days, many OS distributions have utilized transaction
memory functionality. In PowerPC, HV KVM supports TM. But PR KVM
does not.

The drive for the transaction memory support of PR KVM is the
openstack Continuous Integration testing - They runs a HV(hypervisor)
KVM(as level 1) and then run PR KVM(as level 2) on top of that.

This patch set add transaction memory support on PR KVM.

Test cases performed:
linux/tools/testing/selftests/powerpc/tm/tm-syscall
linux/tools/testing/selftests/powerpc/tm/tm-fork
linux/tools/testing/selftests/powerpc/tm/tm-vmx-unavail
linux/tools/testing/selftests/powerpc/tm/tm-tmspr
linux/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv
linux/tools/testing/selftests/powerpc/math/vsx_preempt
linux/tools/testing/selftests/powerpc/math/fpu_signal
linux/tools/testing/selftests/powerpc/math/vmx_preempt
linux/tools/testing/selftests/powerpc/math/fpu_syscall
linux/tools/testing/selftests/powerpc/math/vmx_syscall
linux/tools/testing/selftests/powerpc/math/fpu_preempt
linux/tools/testing/selftests/powerpc/math/vmx_signal
linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr
linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr
linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx
linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr
linux/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx
https://github.com/justdoitqd/publicFiles/blob/master/test_tbegin_pr.c
https://github.com/justdoitqd/publicFiles/blob/master/test_tabort.c
https://github.com/justdoitqd/publicFiles/blob/master/test_kvm_htm_cap.c

Simon Guo (25):
  KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate
file
  KVM: PPC: Book3S PR: add new parameter (guest MSR) for
kvmppc_save_tm()/kvmppc_restore_tm()
  KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR bits in kvmppc_save_tm()
  KVM: PPC: Book3S PR: add C function wrapper for
_kvmppc_save/restore_tm()
  KVM: PPC: Book3S PR: In PR KVM suspends Transactional state when
inject an interrupt.
  KVM: PPC: Book3S PR: PR KVM pass through MSR TM/TS bits to shadow_msr.
  KVM: PPC: Book3S PR: add TEXASR related macros
  KVM: PPC: Book3S PR: Sync TM bits to shadow msr for problem state
guest
  KVM: PPC: Book3S PR: implement RFID TM behavior to suppress change
from S0 to N0
  KVM: PPC: Book3S PR: set MSR HV bit accordingly for PPC970 and others.
  KVM: PPC: Book3S PR: prevent TS bits change in kvmppc_interrupt_pr()
  powerpc: export symbol msr_check_and_set().
  KVM: PPC: Book3S PR: adds new
kvmppc_copyto_vcpu_tm/kvmppc_copyfrom_vcpu_tm API for PR KVM.
  KVM: PPC: Book3S PR: export tm_enable()/tm_disable/tm_abort() APIs
  KVM: PPC: Book3S PR: add kvmppc_save/restore_tm_sprs() APIs
  KVM: PPC: Book3S PR: add transaction memory save/restore skeleton for
PR KVM
  KVM: PPC: Book3S PR: add math support for PR KVM HTM
  KVM: PPC: Book3S PR: make mtspr/mfspr emulation behavior based on
active TM SPRs
  KVM: PPC: Book3S PR: always fail transaction in guest privilege state
  KVM: PPC: Book3S PR: enable NV reg restore for reading TM SPR at guest
privilege state
  KVM: PPC: Book3S PR: adds emulation for treclaim.
  KVM: PPC: Book3S PR: add emulation for trechkpt in PR KVM.
  KVM: PPC: Book3S PR: add emulation for tabort. for privilege guest
  KVM: PPC: Book3S PR: add guard code to prevent returning to guest with
PR=0 and Transactional state
  KVM: PPC: Book3S PR: enable HTM for PR KVM for KVM_CHECK_EXTENSION
ioctl

 arch/powerpc/include/asm/asm-prototypes.h   |  10 +
 arch/powerpc/include/asm/kvm_book3s.h   |   8 +
 arch/powerpc/include/asm/kvm_host.h |   3 +
 arch/powerpc/include/asm/reg.h  |  25 +-
 arch/powerpc/include/asm/tm.h   |   2 -
 arch/powerpc/include/uapi/asm/tm.h  |   2 +-
 arch/powerpc/kernel/process.c   |   1 +
 arch/powerpc/kernel/tm.S|  12 +
 arch/powerpc/kvm/Makefile   |   3 +
 arch/powerpc/kvm/book3s.h   |   1 +
 arch/powerpc/kvm/book3s_64_mmu.c|  11 +-
 arch/powerpc/kvm/book3s_emulate.c   | 279 +++-
 arch/powerpc/kvm/book3s_hv_rmhandlers.S | 259 ++-
 arch/powerpc/kvm/book3s_pr.c| 256 +--
 arch/powerpc/kvm/book3s_segment.S   |  13 +
 arch/powerpc/kvm/powerpc.c  |   3 +-
 arch/powerpc/kvm/tm.S   | 379 
 arch/powerpc/mm/hash_utils_64.c |   1 +
 arch/powerpc/platforms/powernv/copy-paste.h |   3 +-
 19 files changed, 982 insertions(+), 289 deletions(-)
 create mode 100644 arch/powerpc/kvm/tm.S

-- 
1.8.3.1

*** BLURB HERE ***

Simon Guo (26):
  KVM: PPC: Book3S PR: Move kvmppc_save_tm/kvmppc_restore_tm to separate
file
  KVM: PPC: Book3S PR: add new parameter (guest MSR) for
kvmppc_save_tm()/kvmppc_restore_tm()
  KVM: PPC: Book3S PR: turn on FP/VSX/VMX MSR 

Re: [PATCH] KVM: PPC: Use seq_puts() in kvmppc_exit_timing_show()

2018-01-11 Thread Paul Mackerras
On Sun, Jan 07, 2018 at 10:18:08AM +0100, SF Markus Elfring wrote:
> From: Markus Elfring 
> Date: Sun, 7 Jan 2018 10:07:36 +0100
> 
> A headline should be quickly put into a sequence. Thus use the
> function "seq_puts" instead of "seq_printf" for this purpose.
> 
> This issue was detected by using the Coccinelle software.
> 
> Signed-off-by: Markus Elfring 

Thanks, applied to my kvm-ppc-next branch.

Paul.


Re: [PATCH] KVM: PPC: Book3S HV: Remove vcpu->arch.dec usage

2018-01-11 Thread Paul Mackerras
On Tue, Dec 19, 2017 at 03:56:24PM +0100, Alexander Graf wrote:
> On Book3S in HV mode, we don't use the vcpu->arch.dec field at all.
> Instead, all logic is built around vcpu->arch.dec_expires.
> 
> So let's remove the one remaining piece of code that was setting it.
> 
> Signed-off-by: Alexander Graf 

Thanks, applied to my kvm-ppc-next branch.

> Looking through the DEC logic, I fail to see any code that allows
> save or restore of DEC. Do we maybe miss out on that register for
> (live) migration?

Yes, it looks like we do.  I'm amazed no-one has noticed before.  I'll
fix it.

Paul.


Re: [RFC PATCH kernel] KVM: PPC: Book3S PR: Fix WIMG handling under pHyp

2018-01-11 Thread Paul Mackerras
On Wed, Nov 22, 2017 at 02:42:21PM +1100, Alexey Kardashevskiy wrote:
> 96df226 "KVM: PPC: Book3S PR: Preserve storage control bits" added WIMG
> bits preserving but it missed 2 special cases:
> - a magic page in kvmppc_mmu_book3s_64_xlate() and
> - guest real mode in kvmppc_handle_pagefault().
> 
> For these ptes WIMG were 0 and pHyp failed on these causing a guest to
> stop in the very beginning at NIP=0x100 (due to bd9166ffe
> "KVM: PPC: Book3S PR: Exit KVM on failed mapping").
> 
> This initializes WIMG to non-zero value HPTE_R_M. The value is chosen
> as (0x192 & HPTE_R_WIMG); 0x192 is a magic value from
> kvmppc_mmu_map_page().
> 
> Fixes: 96df226 "KVM: PPC: Book3S PR: Preserve storage control bits"
> Signed-off-by: Alexey Kardashevskiy 

Thanks, applied to my kvm-ppc-fixes branch, with an updated commit
message, fixes: tag, and cc:sta...@vger.kernel.org.

Paul.


[PATCH V7] cxl: Add support for ASB_Notify on POWER9

2018-01-11 Thread Christophe Lombard
The POWER9 core supports a new feature: ASB_Notify which requires the
support of the Special Purpose Register: TIDR.

The ASB_Notify command, generated by the AFU, will attempt to
wake-up the host thread identified by the particular LPID:PID:TID.

This patch assign a unique TIDR (thread id) for the current thread which
will be used in the process element entry.

Signed-off-by: Christophe Lombard 
Reviewed-by: Philippe Bergheaud 

---
Changelog[v7]
 - Rebased to latest upstream.
 - Added boolean: "need to allocate a TIDR"
 - Released the mutex and mark the context as STARTED in case of error.

Changelog[v6]
 - Rebased to latest upstream.
 - Updated the ioctl interface.
 - Removed the updated ptrace.
 - Assigned a unique TIDR for the current thread at a lower level.

Changelog[v5]
 - Rebased to latest upstream.
 - Updated the ioctl interface.
 - Returned the tid in the ioctl structure.

Changelog[v4]
 - Rebased to latest upstream.
 - Updated the ioctl interface.
 - Removed the field tid in the context structure.

Changelog[v3]
 - Rebased to latest upstream.
 - Updated attr->tid field in cxllib_get_PE_attributes().

Changelog[v2]
 - Rebased to latest upstream.
 - Updated the ioctl interface.
 - Added a checking to allow updating the TIDR if a P9 chip is present.
---
 arch/powerpc/kernel/process.c |  1 +
 drivers/misc/cxl/context.c|  2 ++
 drivers/misc/cxl/cxl.h|  3 +++
 drivers/misc/cxl/cxllib.c |  3 ++-
 drivers/misc/cxl/file.c   | 15 +--
 drivers/misc/cxl/native.c | 13 -
 include/uapi/misc/cxl.h   | 10 ++
 7 files changed, 39 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 72be0c3..1dc39dd 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1589,6 +1589,7 @@ int set_thread_tidr(struct task_struct *t)
 
return 0;
 }
+EXPORT_SYMBOL_GPL(set_thread_tidr);
 
 #endif /* CONFIG_PPC64 */
 
diff --git a/drivers/misc/cxl/context.c b/drivers/misc/cxl/context.c
index 12a41b2..7ff315a 100644
--- a/drivers/misc/cxl/context.c
+++ b/drivers/misc/cxl/context.c
@@ -45,6 +45,8 @@ int cxl_context_init(struct cxl_context *ctx, struct cxl_afu 
*afu, bool master)
ctx->pid = NULL; /* Set in start work ioctl */
mutex_init(>mapping_lock);
ctx->mapping = NULL;
+   ctx->tidr = 0;
+   ctx->assign_tidr = false;
 
if (cxl_is_power8()) {
spin_lock_init(>sste_lock);
diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h
index e46a406..53149fb 100644
--- a/drivers/misc/cxl/cxl.h
+++ b/drivers/misc/cxl/cxl.h
@@ -630,6 +630,9 @@ struct cxl_context {
struct list_head extra_irq_contexts;
 
struct mm_struct *mm;
+
+   u16 tidr;
+   bool assign_tidr;
 };
 
 struct cxl_irq_info;
diff --git a/drivers/misc/cxl/cxllib.c b/drivers/misc/cxl/cxllib.c
index dc9bc18..30ccba4 100644
--- a/drivers/misc/cxl/cxllib.c
+++ b/drivers/misc/cxl/cxllib.c
@@ -199,10 +199,11 @@ int cxllib_get_PE_attributes(struct task_struct *task,
 */
attr->pid = mm->context.id;
mmput(mm);
+   attr->tid = task->thread.tidr;
} else {
attr->pid = 0;
+   attr->tid = 0;
}
-   attr->tid = 0;
return 0;
 }
 EXPORT_SYMBOL_GPL(cxllib_get_PE_attributes);
diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c
index 76c0b0c..93fd381 100644
--- a/drivers/misc/cxl/file.c
+++ b/drivers/misc/cxl/file.c
@@ -173,7 +173,7 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
 * flags are set it's invalid
 */
if (work.reserved1 || work.reserved2 || work.reserved3 ||
-   work.reserved4 || work.reserved5 || work.reserved6 ||
+   work.reserved4 || work.reserved5 ||
(work.flags & ~CXL_START_WORK_ALL)) {
rc = -EINVAL;
goto out;
@@ -186,12 +186,16 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
rc =  -EINVAL;
goto out;
}
+
if ((rc = afu_register_irqs(ctx, work.num_interrupts)))
goto out;
 
if (work.flags & CXL_START_WORK_AMR)
amr = work.amr & mfspr(SPRN_UAMOR);
 
+   if (work.flags & CXL_START_WORK_TID)
+   ctx->assign_tidr = true;
+
ctx->mmio_err_ff = !!(work.flags & CXL_START_WORK_ERR_FF);
 
/*
@@ -263,8 +267,15 @@ static long afu_ioctl_start_work(struct cxl_context *ctx,
goto out;
}
 
-   ctx->status = STARTED;
rc = 0;
+   if (work.flags & CXL_START_WORK_TID) {
+   work.tid = ctx->tidr;
+   if (copy_to_user(uwork, , sizeof(work)))
+   rc = -EFAULT;
+   }
+
+   ctx->status = STARTED;
+
 out:
mutex_unlock(>status_mutex);
return rc;
diff --git