[PATCH v2 0/2] powerpc/kvm: Enable running guests on RT Linux

2015-04-24 Thread Bogdan Purcareata
This patchset enables running KVM SMP guests with external interrupts on an
underlying RT-enabled Linux. Previous to this patch, a guest with in-kernel MPIC
emulation could easily panic the kernel due to preemption when delivering IPIs
and external interrupts, because of the openpic spinlock becoming a sleeping
mutex on PREEMPT_RT_FULL Linux.

0001: converts the openpic spinlock to a raw spinlock, in order to circumvent
this behavior. While this change is targeted for a RT enabled Linux, it has no
effect on upstream kvm-ppc, so send it upstream for better future maintenance.

0002: disables in-kernel MPIC emulation for guest running on RT, in order to
prevent a potential DoS attack due to large system latencies. This patch is
targeted to RT (due to CONFIG_PREEMPT_RT_FULL), but it can also be applied on
upstream Linux, with no effect.

- applied  compiled against vanilla 4.0
- applied  compiled against stable-rt 3.18-rt

v2:
- updated commit messages
- change the fix for potentially large latencies from limiting the max number of
  VCPUs a guest can have to disabling the in-kernel MPIC

Bogdan Purcareata (2):
  powerpc/kvm: Convert openpic lock to raw_spinlock
  powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL

 arch/powerpc/kvm/Kconfig |  1 +
 arch/powerpc/kvm/mpic.c  | 44 ++--
 2 files changed, 23 insertions(+), 22 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/2] powerpc/kvm: Convert openpic lock to raw_spinlock

2015-04-24 Thread Bogdan Purcareata
The lock in the KVM openpic emulation on PPC is a spinlock_t, meaning it becomes
a sleeping mutex under PREEMPT_RT_FULL. This yields to a situation where this
non-raw lock is grabbed with interrupts already disabled by hard_irq_disable():

kvmppc_prepare_to_enter()
  hard_irq_disable()
  kvmppc_core_prepare_to_enter()
kvmppc_core_check_exceptions()
  kvmppc_booke_irqprio_deliver()
kvmppc_mpic_set_epr()
  spin_lock_irqsave()
...

This happens for guest interrupts that go through this openpic emulation code.
The result is a kernel crash on guest enter (include/linux/kvm_host.h:784).

Converting the lock to a raw_spinlock fixes the issue and enables the guest to
run I/O intensive workloads in a SMP configuration. A similar fix can be found
for the i8254 PIT emulation on x86 [1].

[1] https://lkml.org/lkml/2010/1/11/289

v2:
- updated commit message

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kvm/mpic.c | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 6249cdc..2f70660 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -196,7 +196,7 @@ struct openpic {
int num_mmio_regions;
 
gpa_t reg_base;
-   spinlock_t lock;
+   raw_spinlock_t lock;
 
/* Behavior control */
struct fsl_mpic_info *fsl;
@@ -1103,9 +1103,9 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t 
addr,
mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
}
 
-   spin_unlock(opp-lock);
+   raw_spin_unlock(opp-lock);
kvm_notify_acked_irq(opp-kvm, 0, notify_eoi);
-   spin_lock(opp-lock);
+   raw_spin_lock(opp-lock);
 
break;
}
@@ -1180,12 +1180,12 @@ void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
int cpu = vcpu-arch.irq_cpu_id;
unsigned long flags;
 
-   spin_lock_irqsave(opp-lock, flags);
+   raw_spin_lock_irqsave(opp-lock, flags);
 
if ((opp-gcr  opp-mpic_mode_mask) == GCR_MODE_PROXY)
kvmppc_set_epr(vcpu, openpic_iack(opp, opp-dst[cpu], cpu));
 
-   spin_unlock_irqrestore(opp-lock, flags);
+   raw_spin_unlock_irqrestore(opp-lock, flags);
 }
 
 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
@@ -1386,9 +1386,9 @@ static int kvm_mpic_read(struct kvm_vcpu *vcpu,
return -EINVAL;
}
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
ret = kvm_mpic_read_internal(opp, addr - opp-reg_base, u.val);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
/*
 * Technically only 32-bit accesses are allowed, but be nice to
@@ -1427,10 +1427,10 @@ static int kvm_mpic_write(struct kvm_vcpu *vcpu,
return -EOPNOTSUPP;
}
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
ret = kvm_mpic_write_internal(opp, addr - opp-reg_base,
  *(const u32 *)ptr);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
pr_debug(%s: addr %llx ret %d val %x\n,
 __func__, addr, ret, *(const u32 *)ptr);
@@ -1501,14 +1501,14 @@ static int access_reg(struct openpic *opp, gpa_t addr, 
u32 *val, int type)
if (addr  3)
return -ENXIO;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
 
if (type == ATTR_SET)
ret = kvm_mpic_write_internal(opp, addr, *val);
else
ret = kvm_mpic_read_internal(opp, addr, val);
 
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
pr_debug(%s: type %d addr %llx val %x\n, __func__, type, addr, *val);
 
@@ -1545,9 +1545,9 @@ static int mpic_set_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
if (attr32 != 0  attr32 != 1)
return -EINVAL;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
openpic_set_irq(opp, attr-attr, attr32);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
return 0;
}
 
@@ -1592,9 +1592,9 @@ static int mpic_get_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
if (attr-attr  MAX_SRC)
return -EINVAL;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
attr32 = opp-src[attr-attr].pending;
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
if (put_user(attr32, (u32 __user *)(long)attr-addr))
return -EFAULT;
@@ -1670,7 +1670,7 @@ static int mpic_create(struct kvm_device *dev, u32 type)
opp-kvm = dev-kvm

[PATCH v2 2/2] powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL

2015-04-24 Thread Bogdan Purcareata
While converting the openpic emulation code to use a raw_spinlock_t enables
guests to run on RT, there's still a performance issue. For interrupts sent in
directed delivery mode with a multiple CPU mask, the emulated openpic will loop
through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop
through all the pending interrupts for that VCPU. This is done while holding the
raw_lock, meaning that in all this time the interrupts and preemption are
disabled on the host Linux. A malicious user app can max both these number and
cause a DoS.

This temporary fix is sent for two reasons. First is so that users who want to
use the in-kernel MPIC emulation are aware of the potential latencies, thus
making sure that the hardware MPIC and their usage scenario does not involve
interrupts sent in directed delivery mode, and the number of possible pending
interrupts is kept small. Secondly, this should incentivize the development of a
proper openpic emulation that would be better suited for RT.

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kvm/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 11850f3..415499a 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -158,6 +158,7 @@ config KVM_E500MC
 config KVM_MPIC
bool KVM in-kernel MPIC emulation
depends on KVM  E500
+   depends on !PREEMPT_RT_FULL
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
select HAVE_KVM_IRQ_ROUTING
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/mpic: Remove WHOAMI readback after EOI

2015-03-24 Thread Bogdan Purcareata
After previous discussions regarding the subject [1][2], there's no clear
explanation or reason why the call was needed in the first place. The sensible
argument is some sort of synchronization between the CPU and the MPIC, which
hasn't been pointed out precisely and is no longer required (at least on BookE
platforms).

The benefit of this change is saving a MMIO trap per interrupt when running in a
KVM guest.

[1] https://patchwork.ozlabs.org/patch/429098/
[2] https://patchwork.ozlabs.org/patch/433557/

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/sysdev/mpic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index bbfbbf2..045e72a9 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -655,7 +655,6 @@ static inline struct mpic * mpic_from_irq_data(struct 
irq_data *d)
 static inline void mpic_eoi(struct mpic *mpic)
 {
mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
-   (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
 }
 
 /*
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

2015-02-18 Thread Bogdan Purcareata
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 22b0940..2588b57 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
select HAVE_ARCH_KGDB
+   select HAVE_ARCH_SECCOMP_FILTER
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 1/3] powerpc: Don't force ENOSYS as error on syscall fail

2015-02-18 Thread Bogdan Purcareata
In certain scenarios - e.g. seccomp filtering with ERRNO as default action -
the system call fails for other reasons than the syscall not being available.
The seccomp filter can be configured to store a user-defined error code on
return from a blacklisted syscall. Don't always set ENOSYS on
do_syscall_trace_enter failure.

Delegate setting ENOSYS in case of failure, where appropriate, to
do_syscall_trace_enter.

v4:
- update syscall_exit to be local label on 64bit, after rebasing on top of 3.19

v3:
- keep setting ENOSYS in the syscall entry assembly for scenarios without
  syscall tracing

v2:
- move setting ENOSYS as errno from the syscall entry assembly to
  do_syscall_trace_enter, only in the specific case

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/entry_32.S | 7 ++-
 arch/powerpc/kernel/entry_64.S | 5 +++--
 arch/powerpc/kernel/ptrace.c   | 4 +++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 46fc0f4..b2f88cd 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -333,12 +333,12 @@ _GLOBAL(DoSyscall)
lwz r11,TI_FLAGS(r10)
andi.   r11,r11,_TIF_SYSCALL_DOTRACE
bne-syscall_dotrace
-syscall_dotrace_cont:
cmplwi  0,r0,NR_syscalls
lis r10,sys_call_table@h
ori r10,r10,sys_call_table@l
slwir0,r0,2
bge-66f
+syscall_dotrace_cont:
lwzxr10,r10,r0  /* Fetch system call handler [ptr] */
mtlrr10
addir9,r1,STACK_FRAME_OVERHEAD
@@ -457,6 +457,11 @@ syscall_dotrace:
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
REST_NVGPRS(r1)
+   cmplwi  0,r0,NR_syscalls
+   lis r10,sys_call_table@h
+   ori r10,r10,sys_call_table@l
+   slwir0,r0,2
+   bge-ret_from_syscall
b   syscall_dotrace_cont
 
 syscall_exit_work:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2..5e7434e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -144,7 +144,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
ld  r10,TI_FLAGS(r11)
andi.   r11,r10,_TIF_SYSCALL_DOTRACE
bne syscall_dotrace
-.Lsyscall_dotrace_cont:
cmpldi  0,r0,NR_syscalls
bge-syscall_enosys
 
@@ -253,7 +252,9 @@ syscall_dotrace:
addir9,r1,STACK_FRAME_OVERHEAD
CURRENT_THREAD_INFO(r10, r1)
ld  r10,TI_FLAGS(r10)
-   b   .Lsyscall_dotrace_cont
+   cmpldi  0,r0,NR_syscalls
+   bge-.Lsyscall_exit
+   b   system_call
 
 syscall_enosys:
li  r3,-ENOSYS
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f21897b..2edae06 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs)
secure_computing_strict(regs-gpr[0]);
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
-   tracehook_report_syscall_entry(regs))
+   tracehook_report_syscall_entry(regs)) {
/*
 * Tracing decided this syscall should not happen.
 * We'll return a bogus call number to get an ENOSYS
 * error, but leave the original number in regs-gpr[0].
 */
ret = -1L;
+   syscall_set_return_value(current, regs, ENOSYS, 0);
+   }
 
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs-gpr[0]);
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 2/3] powerpc: Relax secure computing on syscall entry trace

2015-02-18 Thread Bogdan Purcareata
The secure_computing_strict will just force the kernel to panic on
secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel,
syscalls can be denied without system failure.

v4:
- rebase on top of 3.19

v3,v2: no changes

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/ptrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 2edae06..cb9fd33 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
user_exit();
 
-   secure_computing_strict(regs-gpr[0]);
+   /* Do the secure computing check first; failures should be fast. */
+   if (secure_computing() == -1)
+   return -1L;
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
tracehook_report_syscall_entry(regs)) {
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v4 0/3] powerpc: Enable seccomp filter support

2015-02-18 Thread Bogdan Purcareata
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC
architectures, and enable this support.

Testing has been pursued using libseccomp with the latest ppc support patches
[1][2], on Freescale platforms for both ppc and ppc64. Support on ppc64le has
also been tested, courtesy of Mike Strosaker.

[1] https://groups.google.com/forum/#!topic/libseccomp/oz42LfMDsxg
[2] https://groups.google.com/forum/#!topic/libseccomp/TQWfCt_nD7c

v4:
- rebased on top of 3.19

v3:
- keep setting ENOSYS in syscall entry assembly when syscall tracing is disabled

v2:
- move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter

Bogdan Purcareata (3):
  powerpc: Don't force ENOSYS as error on syscall fail
  powerpc: Relax secure computing on syscall entry trace
  powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/kernel/entry_32.S | 7 ++-
 arch/powerpc/kernel/entry_64.S | 5 +++--
 arch/powerpc/kernel/ptrace.c   | 8 ++--
 4 files changed, 16 insertions(+), 5 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/2] powerpc/kvm: Limit MAX_VCPUS for guests running on RT Linux

2015-02-18 Thread Bogdan Purcareata
Due to the introduction of the raw_spinlock for the KVM openpic, guests with a
high number of VCPUs may induce great latencies on the underlying RT Linux
system (e.g. cyclictest reports latencies of ~15ms for guests with 24 VCPUs).
This can be further aggravated by sending a lot of external interrupts to the
guest.

A malicious app can abuse this scenario, causing a DoS of the host Linux.
Until the KVM openpic code is refactored to use finer lock granularity, impose
a limitation on the number of VCPUs a guest can have when running on a
PREEMPT_RT_FULL system with KVM_MPIC emulation.

Signed-off-by: Mihai Caraman mihai.cara...@freescale.com
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
Reviewed-by: Scott Wood scottw...@freescale.com
---
 arch/powerpc/include/asm/kvm_host.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 8ef0512..6f6b928 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -36,8 +36,14 @@
 #include asm/cacheflush.h
 #include asm/hvcall.h
 
+#if defined(CONFIG_PREEMPT_RT_FULL)  defined(CONFIG_KVM_MPIC)
+/* Limit the number of vcpus due to in-kernel mpic concurrency */
+#define KVM_MAX_VCPUS  4
+#define KVM_MAX_VCORES 4
+#else
 #define KVM_MAX_VCPUS  NR_CPUS
 #define KVM_MAX_VCORES NR_CPUS
+#endif
 #define KVM_USER_MEM_SLOTS 32
 #define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS
 
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 1/2] powerpc/kvm: Convert openpic lock to raw_spinlock

2015-02-18 Thread Bogdan Purcareata
This patch enables running intensive I/O workloads, e.g. netperf, in a guest
deployed on a RT host. It also enable guests to be SMP.

The openpic spinlock becomes a sleeping mutex on a RT system. This no longer
guarantees that EPR is atomic with exception delivery. The guest VCPU thread
fails due to a BUG_ON(preemptible()) when running netperf.

In order to make the kvmppc_mpic_set_epr() call safe on RT from non-atomic
context, convert the openpic lock to a raw_spinlock. A similar approach can
be seen for x86 platforms in the following commit [1].

Here are some comparative cyclitest measurements run inside a high priority RT
guest run on a RT host. The guest has 1 VCPU and the test has been run for 15
minutes. The guest runs ~750 hackbench processes as background stress.

  spinlock  raw_spinlock
Min latency (us)  4 4
Avg latency (us)  1519
Max latency (us)  7062

[1] https://lkml.org/lkml/2010/1/11/289

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
Reviewed-by: Scott Wood scottw...@freescale.com
---
 arch/powerpc/kvm/mpic.c | 44 ++--
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c
index 39b3a8f..9fad0aa 100644
--- a/arch/powerpc/kvm/mpic.c
+++ b/arch/powerpc/kvm/mpic.c
@@ -196,7 +196,7 @@ struct openpic {
int num_mmio_regions;
 
gpa_t reg_base;
-   spinlock_t lock;
+   raw_spinlock_t lock;
 
/* Behavior control */
struct fsl_mpic_info *fsl;
@@ -1108,9 +1108,9 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t 
addr,
mpic_irq_raise(opp, dst, ILR_INTTGT_INT);
}
 
-   spin_unlock(opp-lock);
+   raw_spin_unlock(opp-lock);
kvm_notify_acked_irq(opp-kvm, 0, notify_eoi);
-   spin_lock(opp-lock);
+   raw_spin_lock(opp-lock);
 
break;
}
@@ -1185,12 +1185,12 @@ void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu)
int cpu = vcpu-arch.irq_cpu_id;
unsigned long flags;
 
-   spin_lock_irqsave(opp-lock, flags);
+   raw_spin_lock_irqsave(opp-lock, flags);
 
if ((opp-gcr  opp-mpic_mode_mask) == GCR_MODE_PROXY)
kvmppc_set_epr(vcpu, openpic_iack(opp, opp-dst[cpu], cpu));
 
-   spin_unlock_irqrestore(opp-lock, flags);
+   raw_spin_unlock_irqrestore(opp-lock, flags);
 }
 
 static int openpic_cpu_read_internal(void *opaque, gpa_t addr,
@@ -1390,9 +1390,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, 
gpa_t addr,
return -EINVAL;
}
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
ret = kvm_mpic_read_internal(opp, addr - opp-reg_base, u.val);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
/*
 * Technically only 32-bit accesses are allowed, but be nice to
@@ -1430,10 +1430,10 @@ static int kvm_mpic_write(struct kvm_io_device *this, 
gpa_t addr,
return -EOPNOTSUPP;
}
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
ret = kvm_mpic_write_internal(opp, addr - opp-reg_base,
  *(const u32 *)ptr);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
pr_debug(%s: addr %llx ret %d val %x\n,
 __func__, addr, ret, *(const u32 *)ptr);
@@ -1504,14 +1504,14 @@ static int access_reg(struct openpic *opp, gpa_t addr, 
u32 *val, int type)
if (addr  3)
return -ENXIO;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
 
if (type == ATTR_SET)
ret = kvm_mpic_write_internal(opp, addr, *val);
else
ret = kvm_mpic_read_internal(opp, addr, val);
 
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
pr_debug(%s: type %d addr %llx val %x\n, __func__, type, addr, *val);
 
@@ -1548,9 +1548,9 @@ static int mpic_set_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
if (attr32 != 0  attr32 != 1)
return -EINVAL;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
openpic_set_irq(opp, attr-attr, attr32);
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
return 0;
}
 
@@ -1595,9 +1595,9 @@ static int mpic_get_attr(struct kvm_device *dev, struct 
kvm_device_attr *attr)
if (attr-attr  MAX_SRC)
return -EINVAL;
 
-   spin_lock_irq(opp-lock);
+   raw_spin_lock_irq(opp-lock);
attr32 = opp-src[attr-attr].pending;
-   spin_unlock_irq(opp-lock);
+   raw_spin_unlock_irq(opp-lock);
 
if (put_user(attr32, (u32 __user *)(long)attr-addr

[PATCH 0/2] powerpc/kvm: Enable running guests on RT Linux

2015-02-18 Thread Bogdan Purcareata
This patchset enables running KVM SMP guests with external interrupts on an
underlying RT-enabled Linux. Previous to this patch, a guest with in-kernel MPIC
emulation could easily panic the kernel due to preemption when delivering IPIs
and external interrupts, because of the openpic spinlock becoming a sleeping
mutex on PREEMPT_RT_FULL Linux.

0001: converts the openpic spinlock to a raw spinlock, in order to circumvent
this behavior. While this change is targeted for a RT enabled Linux, it has no
effect on upstream kvm-ppc, so send it upstream for better future maintenance.

0002: introduces a limit on the maximum VCPUs a guest can have, in order to
prevent potential DoS attack due to large system latencies. This patch is
targeted to RT (due to CONFIG_PREEMPT_RT_FULL), but it can also be applied on
upstream Linux, with no effect. Not sure if it's best to send it upstream and
have a hanging CONFIG_PREEMPT_RT_FULL check there, with no effect, or send it
against linux-stable-rt. Please apply as you consider appropriate.

- applied  compiled against upstream 3.19
- applied  compiled against stable-rt 3.14-rt (0002 with minor fuzz)

Bogdan Purcareata (2):
  powerpc/kvm: Convert openpic lock to raw_spinlock
  powerpc/kvm: Limit MAX_VCPUS for guests running on RT Linux

 arch/powerpc/include/asm/kvm_host.h |  6 +
 arch/powerpc/kvm/mpic.c | 44 ++---
 2 files changed, 28 insertions(+), 22 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 0/3] powerpc: Enable seccomp filter support

2015-02-13 Thread Bogdan Purcareata
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC
architectures, and enable this support.

Testing has been pursued using libseccomp with the latest ppc support patches
[1][2], on Freescale platforms for both ppc and ppc64. ppc64le support is
untested.

[1] https://groups.google.com/forum/#!topic/libseccomp/oz42LfMDsxg
[2] https://groups.google.com/forum/#!topic/libseccomp/TQWfCt_nD7c

v3:
- keep setting ENOSYS in syscall entry assembly when syscall tracing is disabled

v2:
- move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter

Bogdan Purcareata (3):
  powerpc: Don't force ENOSYS as error on syscall fail
  powerpc: Relax secure computing on syscall entry trace
  powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/kernel/entry_32.S | 7 ++-
 arch/powerpc/kernel/entry_64.S | 5 +++--
 arch/powerpc/kernel/ptrace.c   | 8 ++--
 4 files changed, 16 insertions(+), 5 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 2/3] powerpc: Relax secure computing on syscall entry trace

2015-02-13 Thread Bogdan Purcareata
The secure_computing_strict will just force the kernel to panic on
secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel,
syscalls can be denied without system failure.

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/ptrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 2edae06..285e056 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
user_exit();
 
-   secure_computing_strict(regs-gpr[0]);
+   /* Do the secure computing check first; failures should be fast. */
+   if (secure_computing(regs-gpr[0]) == -1)
+   return -1L;
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
tracehook_report_syscall_entry(regs)) {
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

2015-02-13 Thread Bogdan Purcareata
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 22b0940..2588b57 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
select HAVE_ARCH_KGDB
+   select HAVE_ARCH_SECCOMP_FILTER
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v3 1/3] powerpc: Don't force ENOSYS as error on syscall fail

2015-02-13 Thread Bogdan Purcareata
In certain scenarios - e.g. seccomp filtering with ERRNO as default action -
the system call fails for other reasons than the syscall not being available.
The seccomp filter can be configured to store a user-defined error code on
return from a blacklisted syscall. Don't always set ENOSYS on
do_syscall_trace_enter failure.

Delegate setting ENOSYS in case of failure, where appropriate, to
do_syscall_trace_enter.

v3:
- keep setting ENOSYS in the syscall entry assembly for scenarios without
  syscall tracing

v2:
- move setting ENOSYS as errno from the syscall entry assembly to
  do_syscall_trace_enter, only in the specific case

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/entry_32.S | 7 ++-
 arch/powerpc/kernel/entry_64.S | 5 +++--
 arch/powerpc/kernel/ptrace.c   | 4 +++-
 3 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 46fc0f4..b2f88cd 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -333,12 +333,12 @@ _GLOBAL(DoSyscall)
lwz r11,TI_FLAGS(r10)
andi.   r11,r11,_TIF_SYSCALL_DOTRACE
bne-syscall_dotrace
-syscall_dotrace_cont:
cmplwi  0,r0,NR_syscalls
lis r10,sys_call_table@h
ori r10,r10,sys_call_table@l
slwir0,r0,2
bge-66f
+syscall_dotrace_cont:
lwzxr10,r10,r0  /* Fetch system call handler [ptr] */
mtlrr10
addir9,r1,STACK_FRAME_OVERHEAD
@@ -457,6 +457,11 @@ syscall_dotrace:
lwz r7,GPR7(r1)
lwz r8,GPR8(r1)
REST_NVGPRS(r1)
+   cmplwi  0,r0,NR_syscalls
+   lis r10,sys_call_table@h
+   ori r10,r10,sys_call_table@l
+   slwir0,r0,2
+   bge-ret_from_syscall
b   syscall_dotrace_cont
 
 syscall_exit_work:
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index d180caf2..0d22fa8 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -144,7 +144,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
ld  r10,TI_FLAGS(r11)
andi.   r11,r10,_TIF_SYSCALL_DOTRACE
bne syscall_dotrace
-.Lsyscall_dotrace_cont:
cmpldi  0,r0,NR_syscalls
bge-syscall_enosys
 
@@ -253,7 +252,9 @@ syscall_dotrace:
addir9,r1,STACK_FRAME_OVERHEAD
CURRENT_THREAD_INFO(r10, r1)
ld  r10,TI_FLAGS(r10)
-   b   .Lsyscall_dotrace_cont
+   cmpldi  0,r0,NR_syscalls
+   bge-syscall_exit
+   b   system_call
 
 syscall_enosys:
li  r3,-ENOSYS
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f21897b..2edae06 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs)
secure_computing_strict(regs-gpr[0]);
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
-   tracehook_report_syscall_entry(regs))
+   tracehook_report_syscall_entry(regs)) {
/*
 * Tracing decided this syscall should not happen.
 * We'll return a bogus call number to get an ENOSYS
 * error, but leave the original number in regs-gpr[0].
 */
ret = -1L;
+   syscall_set_return_value(current, regs, ENOSYS, 0);
+   }
 
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs-gpr[0]);
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

2015-02-11 Thread Bogdan Purcareata
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a2a168e..72f363e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
select HAVE_ARCH_KGDB
+   select HAVE_ARCH_SECCOMP_FILTER
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: Relax secure computing on syscall entry trace

2015-02-11 Thread Bogdan Purcareata
The secure_computing_strict will just force the kernel to panic on
secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel,
syscalls can be denied without system failure.

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/ptrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index d82fd0b..d41faab 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
user_exit();
 
-   secure_computing_strict(regs-gpr[0]);
+   /* Do the secure computing check first; failures should be fast. */
+   if (secure_computing(regs-gpr[0]) == -1)
+   return -1L;
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
tracehook_report_syscall_entry(regs)) {
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 0/3] powerpc: Enable seccomp filter support

2015-02-11 Thread Bogdan Purcareata
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC
architectures, and enable this support.

Testing has been pursued using libseccomp with the latest ppc support patches
[1], on Freescale platforms for both ppc and ppc64. ppc64le support is
untested.

[1] https://groups.google.com/forum/#!topic/libseccomp/ktR-bQr1tGw

v2:
- move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter

Bogdan Purcareata (3):
  powerpc: Don't force ENOSYS as error on syscall fail
  powerpc: Relax secure computing on syscall entry trace
  powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/kernel/entry_32.S | 2 +-
 arch/powerpc/kernel/entry_64.S | 1 -
 arch/powerpc/kernel/ptrace.c   | 8 ++--
 4 files changed, 8 insertions(+), 4 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2 1/3] powerpc: Don't force ENOSYS as error on syscall fail

2015-02-11 Thread Bogdan Purcareata
In certain scenarios - e.g. seccomp filtering with ERRNO as default action -
the system call fails for other reasons than the syscall not being available.
The seccomp filter can be configured to store a user-defined error code on
return from a blacklisted syscall. Don't always set ENOSYS on
do_syscall_trace_enter failure.

v2:
- move setting ENOSYS as errno from the syscall entry assembly to
  do_syscall_trace_enter, only in the specific case

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/entry_32.S | 2 +-
 arch/powerpc/kernel/entry_64.S | 1 -
 arch/powerpc/kernel/ptrace.c   | 4 +++-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 10a0935..d2c58a3 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -425,7 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
b   1b
 #endif  /* CONFIG_44x */
 
-66:li  r3,-ENOSYS
+66:
b   ret_from_syscall
 
.globl  ret_from_fork
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 194e46d..0111e04 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -269,7 +269,6 @@ syscall_dotrace:
b   .Lsyscall_dotrace_cont
 
 syscall_enosys:
-   li  r3,-ENOSYS
b   syscall_exit

 syscall_exit_work:
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index f21897b..d82fd0b 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs)
secure_computing_strict(regs-gpr[0]);
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
-   tracehook_report_syscall_entry(regs))
+   tracehook_report_syscall_entry(regs)) {
/*
 * Tracing decided this syscall should not happen.
 * We'll return a bogus call number to get an ENOSYS
 * error, but leave the original number in regs-gpr[0].
 */
ret = -1L;
+   syscall_set_return_value(current, regs, -ENOSYS, 0);
+   }
 
if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
trace_sys_enter(regs, regs-gpr[0]);
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 0/3] powerpc: Enable seccomp filter support

2015-02-09 Thread Bogdan Purcareata
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC
architectures, and enable this support.

Testing has been pursued using libseccomp with the latest ppc support patches
[1], on Freescale platforms for both ppc and ppc64. ppc64le support is
untested.

[1] https://groups.google.com/forum/#!topic/libseccomp/ktR-bQr1tGw

Bogdan Purcareata (3):
  powerpc: Don't force ENOSYS as error on syscall fail
  powerpc: Relax secure computing on syscall entry trace
  powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

 arch/powerpc/Kconfig   | 1 +
 arch/powerpc/kernel/entry_32.S | 3 ++-
 arch/powerpc/kernel/entry_64.S | 2 +-
 arch/powerpc/kernel/ptrace.c   | 4 +++-
 4 files changed, 7 insertions(+), 3 deletions(-)

-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH 2/3] powerpc: Relax secure computing on syscall entry trace

2015-02-09 Thread Bogdan Purcareata
The secure_computing_strict will just force the kernel to panic on
secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel,
syscalls can be denied without system failure.

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/ptrace.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 100e01c..5c654ac 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
user_exit();
 
-   secure_computing_strict(regs-gpr[0]);
+   /* Do the secure computing check first; failures should be fast. */
+   if (secure_computing(regs-gpr[0]) == -1)
+   return -1L;
 
if (test_thread_flag(TIF_SYSCALL_TRACE) 
tracehook_report_syscall_entry(regs))
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER

2015-02-09 Thread Bogdan Purcareata
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index a2a168e..72f363e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -104,6 +104,7 @@ config PPC
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN
select HAVE_KPROBES
select HAVE_ARCH_KGDB
+   select HAVE_ARCH_SECCOMP_FILTER
select HAVE_KRETPROBES
select HAVE_ARCH_TRACEHOOK
select HAVE_MEMBLOCK
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[RFC][PATCH 1/3] powerpc: Don't force ENOSYS as error on syscall fail

2015-02-08 Thread Bogdan Purcareata
In certain scenarios - e.g. seccomp filtering with ERRNO as default action -
the system call fails for other reasons than the syscall not being available.
The seccomp filter can be configured to store a user-defined error code on
return from a blacklisted syscall.

The RFC is this: are there currently any user-space scenarios where it is
required that the system call return ENOSYS as error code on failure, no matter
the circumstances? I don't want to break userspace requirements. I have not
added code to force this error code in situations different than
secure_computing failure, in order to keep overhead at a minimum.

Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 arch/powerpc/kernel/entry_32.S | 3 ++-
 arch/powerpc/kernel/entry_64.S | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 59848e5..52e48dd 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -425,7 +425,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX)
b   1b
 #endif  /* CONFIG_44x */
 
-66:li  r3,-ENOSYS
+66:
+#  li  r3,-ENOSYS
b   ret_from_syscall
 
.globl  ret_from_fork
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index e6bfe8e..80db02e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -269,7 +269,7 @@ syscall_dotrace:
b   .Lsyscall_dotrace_cont
 
 syscall_enosys:
-   li  r3,-ENOSYS
+#  li  r3,-ENOSYS
b   syscall_exit

 syscall_exit_work:
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH v2] powerpc/mpic: Add DT option to skip readback after EOI

2015-01-27 Thread Bogdan Purcareata
The readback acts as a synchronization mechanism in handling external
interrupts, making sure the core waits until EOI write completion. This is
required in certain scenarios, such as when the MPIC communicates with a PCI
device in posted write mode. If the device uses legacy interrupts, and the CPU
returns from the interrupt as soon as it fires the EOI write, there is a chance
to receive spurious interrupts because the line isn't deasserted yet.

This doesn't happen in an emulated environment, e.g. KVM openpic, therefore the
readback is not required. In order to satisfy both cases, make the readback
optional and configurable through the device tree.

Skipping the readback saves a MMIO trap per interrupt.

v2: updated commit message

Signed-off-by: Scott Wood scottw...@freescale.com
[add DT binding, update commit message]
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | 13 +
 arch/powerpc/include/asm/mpic.h|  2 ++
 arch/powerpc/sysdev/mpic.c |  8 +++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt 
b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
index dc57446..9789094 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
@@ -77,6 +77,19 @@ PROPERTIES
   in the global feature registers.  If specified, this field will
   override the value read from MPIC_GREG_FEATURE_LAST_SRC.
 
+  - mpic-eoi-no-readback
+  Usage: optional
+  Value type: empty
+  Definition: The presence of this property specifies that the
+  MPIC will not issue a readback when delivering the EOI for an
+  external interrupt. The readback operation is done by reading
+  the CPU WHOAMI register after writing to the CPU EOI register.
+  Originally, this was required due to the fact that the MPIC
+  operates at lower frequencies, or in scenarios where the MPIC
+  is connected through PCI with write posting. This is not the
+  case in an emulated environment (e.g. KVM guest), or in scenarios
+  where interrupts are not handled in a loop of get_irq() calls.
+
 INTERRUPT SPECIFIER DEFINITION
 
   Interrupt specifiers consists of 4 cells encoded as
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 754f93d..e2a4146 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -386,6 +386,8 @@ extern struct bus_type mpic_subsys;
  * from the BRR1 register).
 */
 #define MPIC_FSL_HAS_EIMR  0x0001
+/* Dont bother with readback after MPIC EOI */
+#define MPIC_EOI_NO_READBACK   0x0002
 
 /* MPIC HW modification ID */
 #define MPIC_REGSET_MASK   0xf000
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f3e8624..431f68e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -656,7 +656,9 @@ static inline struct mpic * mpic_from_irq_data(struct 
irq_data *d)
 static inline void mpic_eoi(struct mpic *mpic)
 {
mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
-   (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
+
+   if (!(mpic-flags  MPIC_EOI_NO_READBACK))
+   (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
 }
 
 /*
@@ -1290,6 +1292,10 @@ struct mpic * __init mpic_alloc(struct device_node *node,
flags |= MPIC_SINGLE_DEST_CPU;
if (of_device_is_compatible(node, fsl,mpic))
flags |= MPIC_FSL | MPIC_LARGE_VECTORS;
+   if (of_get_property(node, mpic-eoi-no-readback, NULL)) {
+   pr_debug(mpic: no readback activated);
+   flags |= MPIC_EOI_NO_READBACK;
+   }
 
mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
if (mpic == NULL)
-- 
2.1.4

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev

[PATCH] powerpc/mpic: Add DT option to skip readback after EOI

2015-01-14 Thread Bogdan Purcareata
The readback is necessary in order to handle PCI posted
writes, or when the MPIC is handling interrupts in a loop
(ppc_md.get_irq). Newer MPIC versions don't require this
readback. Leave the option configurable using a device
tree entry.

This saves a MMIO trap per interrupt.

Signed-off-by: Scott Wood scottw...@freescale.com
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com
---
 Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | 13 +
 arch/powerpc/include/asm/mpic.h|  2 ++
 arch/powerpc/sysdev/mpic.c |  8 +++-
 3 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt 
b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
index dc57446..9789094 100644
--- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
+++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt
@@ -77,6 +77,19 @@ PROPERTIES
   in the global feature registers.  If specified, this field will
   override the value read from MPIC_GREG_FEATURE_LAST_SRC.
 
+  - mpic-eoi-no-readback
+  Usage: optional
+  Value type: empty
+  Definition: The presence of this property specifies that the
+  MPIC will not issue a readback when delivering the EOI for an
+  external interrupt. The readback operation is done by reading
+  the CPU WHOAMI register after writing to the CPU EOI register.
+  Originally, this was required due to the fact that the MPIC
+  operates at lower frequencies, or in scenarios where the MPIC
+  is connected through PCI with write posting. This is not the
+  case in an emulated environment (e.g. KVM guest), or in scenarios
+  where interrupts are not handled in a loop of get_irq() calls.
+
 INTERRUPT SPECIFIER DEFINITION
 
   Interrupt specifiers consists of 4 cells encoded as
diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h
index 754f93d..e2a4146 100644
--- a/arch/powerpc/include/asm/mpic.h
+++ b/arch/powerpc/include/asm/mpic.h
@@ -386,6 +386,8 @@ extern struct bus_type mpic_subsys;
  * from the BRR1 register).
 */
 #define MPIC_FSL_HAS_EIMR  0x0001
+/* Dont bother with readback after MPIC EOI */
+#define MPIC_EOI_NO_READBACK   0x0002
 
 /* MPIC HW modification ID */
 #define MPIC_REGSET_MASK   0xf000
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index f3e8624..431f68e 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -656,7 +656,9 @@ static inline struct mpic * mpic_from_irq_data(struct 
irq_data *d)
 static inline void mpic_eoi(struct mpic *mpic)
 {
mpic_cpu_write(MPIC_INFO(CPU_EOI), 0);
-   (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
+
+   if (!(mpic-flags  MPIC_EOI_NO_READBACK))
+   (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI));
 }
 
 /*
@@ -1290,6 +1292,10 @@ struct mpic * __init mpic_alloc(struct device_node *node,
flags |= MPIC_SINGLE_DEST_CPU;
if (of_device_is_compatible(node, fsl,mpic))
flags |= MPIC_FSL | MPIC_LARGE_VECTORS;
+   if (of_get_property(node, mpic-eoi-no-readback, NULL)) {
+   pr_debug(mpic: no readback activated);
+   flags |= MPIC_EOI_NO_READBACK;
+   }
 
mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL);
if (mpic == NULL)
-- 
2.1.3

___
Linuxppc-dev mailing list
Linuxppc-dev@lists.ozlabs.org
https://lists.ozlabs.org/listinfo/linuxppc-dev