[PATCH v2 0/2] powerpc/kvm: Enable running guests on RT Linux
This patchset enables running KVM SMP guests with external interrupts on an underlying RT-enabled Linux. Previous to this patch, a guest with in-kernel MPIC emulation could easily panic the kernel due to preemption when delivering IPIs and external interrupts, because of the openpic spinlock becoming a sleeping mutex on PREEMPT_RT_FULL Linux. 0001: converts the openpic spinlock to a raw spinlock, in order to circumvent this behavior. While this change is targeted for a RT enabled Linux, it has no effect on upstream kvm-ppc, so send it upstream for better future maintenance. 0002: disables in-kernel MPIC emulation for guest running on RT, in order to prevent a potential DoS attack due to large system latencies. This patch is targeted to RT (due to CONFIG_PREEMPT_RT_FULL), but it can also be applied on upstream Linux, with no effect. - applied compiled against vanilla 4.0 - applied compiled against stable-rt 3.18-rt v2: - updated commit messages - change the fix for potentially large latencies from limiting the max number of VCPUs a guest can have to disabling the in-kernel MPIC Bogdan Purcareata (2): powerpc/kvm: Convert openpic lock to raw_spinlock powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL arch/powerpc/kvm/Kconfig | 1 + arch/powerpc/kvm/mpic.c | 44 ++-- 2 files changed, 23 insertions(+), 22 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/2] powerpc/kvm: Convert openpic lock to raw_spinlock
The lock in the KVM openpic emulation on PPC is a spinlock_t, meaning it becomes a sleeping mutex under PREEMPT_RT_FULL. This yields to a situation where this non-raw lock is grabbed with interrupts already disabled by hard_irq_disable(): kvmppc_prepare_to_enter() hard_irq_disable() kvmppc_core_prepare_to_enter() kvmppc_core_check_exceptions() kvmppc_booke_irqprio_deliver() kvmppc_mpic_set_epr() spin_lock_irqsave() ... This happens for guest interrupts that go through this openpic emulation code. The result is a kernel crash on guest enter (include/linux/kvm_host.h:784). Converting the lock to a raw_spinlock fixes the issue and enables the guest to run I/O intensive workloads in a SMP configuration. A similar fix can be found for the i8254 PIT emulation on x86 [1]. [1] https://lkml.org/lkml/2010/1/11/289 v2: - updated commit message Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kvm/mpic.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 6249cdc..2f70660 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -196,7 +196,7 @@ struct openpic { int num_mmio_regions; gpa_t reg_base; - spinlock_t lock; + raw_spinlock_t lock; /* Behavior control */ struct fsl_mpic_info *fsl; @@ -1103,9 +1103,9 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } - spin_unlock(opp-lock); + raw_spin_unlock(opp-lock); kvm_notify_acked_irq(opp-kvm, 0, notify_eoi); - spin_lock(opp-lock); + raw_spin_lock(opp-lock); break; } @@ -1180,12 +1180,12 @@ void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) int cpu = vcpu-arch.irq_cpu_id; unsigned long flags; - spin_lock_irqsave(opp-lock, flags); + raw_spin_lock_irqsave(opp-lock, flags); if ((opp-gcr opp-mpic_mode_mask) == GCR_MODE_PROXY) kvmppc_set_epr(vcpu, openpic_iack(opp, opp-dst[cpu], cpu)); - spin_unlock_irqrestore(opp-lock, flags); + raw_spin_unlock_irqrestore(opp-lock, flags); } static int openpic_cpu_read_internal(void *opaque, gpa_t addr, @@ -1386,9 +1386,9 @@ static int kvm_mpic_read(struct kvm_vcpu *vcpu, return -EINVAL; } - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); ret = kvm_mpic_read_internal(opp, addr - opp-reg_base, u.val); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); /* * Technically only 32-bit accesses are allowed, but be nice to @@ -1427,10 +1427,10 @@ static int kvm_mpic_write(struct kvm_vcpu *vcpu, return -EOPNOTSUPP; } - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); ret = kvm_mpic_write_internal(opp, addr - opp-reg_base, *(const u32 *)ptr); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); pr_debug(%s: addr %llx ret %d val %x\n, __func__, addr, ret, *(const u32 *)ptr); @@ -1501,14 +1501,14 @@ static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) if (addr 3) return -ENXIO; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); if (type == ATTR_SET) ret = kvm_mpic_write_internal(opp, addr, *val); else ret = kvm_mpic_read_internal(opp, addr, val); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); pr_debug(%s: type %d addr %llx val %x\n, __func__, type, addr, *val); @@ -1545,9 +1545,9 @@ static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (attr32 != 0 attr32 != 1) return -EINVAL; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); openpic_set_irq(opp, attr-attr, attr32); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); return 0; } @@ -1592,9 +1592,9 @@ static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (attr-attr MAX_SRC) return -EINVAL; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); attr32 = opp-src[attr-attr].pending; - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); if (put_user(attr32, (u32 __user *)(long)attr-addr)) return -EFAULT; @@ -1670,7 +1670,7 @@ static int mpic_create(struct kvm_device *dev, u32 type) opp-kvm = dev-kvm
[PATCH v2 2/2] powerpc/kvm: Disable in-kernel MPIC emulation for PREEMPT_RT_FULL
While converting the openpic emulation code to use a raw_spinlock_t enables guests to run on RT, there's still a performance issue. For interrupts sent in directed delivery mode with a multiple CPU mask, the emulated openpic will loop through all of the VCPUs, and for each VCPUs, it call IRQ_check, which will loop through all the pending interrupts for that VCPU. This is done while holding the raw_lock, meaning that in all this time the interrupts and preemption are disabled on the host Linux. A malicious user app can max both these number and cause a DoS. This temporary fix is sent for two reasons. First is so that users who want to use the in-kernel MPIC emulation are aware of the potential latencies, thus making sure that the hardware MPIC and their usage scenario does not involve interrupts sent in directed delivery mode, and the number of possible pending interrupts is kept small. Secondly, this should incentivize the development of a proper openpic emulation that would be better suited for RT. Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kvm/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 11850f3..415499a 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -158,6 +158,7 @@ config KVM_E500MC config KVM_MPIC bool KVM in-kernel MPIC emulation depends on KVM E500 + depends on !PREEMPT_RT_FULL select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQFD select HAVE_KVM_IRQ_ROUTING -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/mpic: Remove WHOAMI readback after EOI
After previous discussions regarding the subject [1][2], there's no clear explanation or reason why the call was needed in the first place. The sensible argument is some sort of synchronization between the CPU and the MPIC, which hasn't been pointed out precisely and is no longer required (at least on BookE platforms). The benefit of this change is saving a MMIO trap per interrupt when running in a KVM guest. [1] https://patchwork.ozlabs.org/patch/429098/ [2] https://patchwork.ozlabs.org/patch/433557/ Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/sysdev/mpic.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index bbfbbf2..045e72a9 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -655,7 +655,6 @@ static inline struct mpic * mpic_from_irq_data(struct irq_data *d) static inline void mpic_eoi(struct mpic *mpic) { mpic_cpu_write(MPIC_INFO(CPU_EOI), 0); - (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI)); } /* -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v4 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 22b0940..2588b57 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -104,6 +104,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN select HAVE_KPROBES select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v4 1/3] powerpc: Don't force ENOSYS as error on syscall fail
In certain scenarios - e.g. seccomp filtering with ERRNO as default action - the system call fails for other reasons than the syscall not being available. The seccomp filter can be configured to store a user-defined error code on return from a blacklisted syscall. Don't always set ENOSYS on do_syscall_trace_enter failure. Delegate setting ENOSYS in case of failure, where appropriate, to do_syscall_trace_enter. v4: - update syscall_exit to be local label on 64bit, after rebasing on top of 3.19 v3: - keep setting ENOSYS in the syscall entry assembly for scenarios without syscall tracing v2: - move setting ENOSYS as errno from the syscall entry assembly to do_syscall_trace_enter, only in the specific case Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/entry_32.S | 7 ++- arch/powerpc/kernel/entry_64.S | 5 +++-- arch/powerpc/kernel/ptrace.c | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 46fc0f4..b2f88cd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -333,12 +333,12 @@ _GLOBAL(DoSyscall) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_DOTRACE bne-syscall_dotrace -syscall_dotrace_cont: cmplwi 0,r0,NR_syscalls lis r10,sys_call_table@h ori r10,r10,sys_call_table@l slwir0,r0,2 bge-66f +syscall_dotrace_cont: lwzxr10,r10,r0 /* Fetch system call handler [ptr] */ mtlrr10 addir9,r1,STACK_FRAME_OVERHEAD @@ -457,6 +457,11 @@ syscall_dotrace: lwz r7,GPR7(r1) lwz r8,GPR8(r1) REST_NVGPRS(r1) + cmplwi 0,r0,NR_syscalls + lis r10,sys_call_table@h + ori r10,r10,sys_call_table@l + slwir0,r0,2 + bge-ret_from_syscall b syscall_dotrace_cont syscall_exit_work: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d180caf2..5e7434e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -144,7 +144,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_DOTRACE bne syscall_dotrace -.Lsyscall_dotrace_cont: cmpldi 0,r0,NR_syscalls bge-syscall_enosys @@ -253,7 +252,9 @@ syscall_dotrace: addir9,r1,STACK_FRAME_OVERHEAD CURRENT_THREAD_INFO(r10, r1) ld r10,TI_FLAGS(r10) - b .Lsyscall_dotrace_cont + cmpldi 0,r0,NR_syscalls + bge-.Lsyscall_exit + b system_call syscall_enosys: li r3,-ENOSYS diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f21897b..2edae06 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs) secure_computing_strict(regs-gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) - tracehook_report_syscall_entry(regs)) + tracehook_report_syscall_entry(regs)) { /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS * error, but leave the original number in regs-gpr[0]. */ ret = -1L; + syscall_set_return_value(current, regs, ENOSYS, 0); + } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs-gpr[0]); -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v4 2/3] powerpc: Relax secure computing on syscall entry trace
The secure_computing_strict will just force the kernel to panic on secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel, syscalls can be denied without system failure. v4: - rebase on top of 3.19 v3,v2: no changes Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2edae06..cb9fd33 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) user_exit(); - secure_computing_strict(regs-gpr[0]); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing() == -1) + return -1L; if (test_thread_flag(TIF_SYSCALL_TRACE) tracehook_report_syscall_entry(regs)) { -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v4 0/3] powerpc: Enable seccomp filter support
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC architectures, and enable this support. Testing has been pursued using libseccomp with the latest ppc support patches [1][2], on Freescale platforms for both ppc and ppc64. Support on ppc64le has also been tested, courtesy of Mike Strosaker. [1] https://groups.google.com/forum/#!topic/libseccomp/oz42LfMDsxg [2] https://groups.google.com/forum/#!topic/libseccomp/TQWfCt_nD7c v4: - rebased on top of 3.19 v3: - keep setting ENOSYS in syscall entry assembly when syscall tracing is disabled v2: - move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter Bogdan Purcareata (3): powerpc: Don't force ENOSYS as error on syscall fail powerpc: Relax secure computing on syscall entry trace powerpc: Enable HAVE_ARCH_SECCOMP_FILTER arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/entry_32.S | 7 ++- arch/powerpc/kernel/entry_64.S | 5 +++-- arch/powerpc/kernel/ptrace.c | 8 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/2] powerpc/kvm: Limit MAX_VCPUS for guests running on RT Linux
Due to the introduction of the raw_spinlock for the KVM openpic, guests with a high number of VCPUs may induce great latencies on the underlying RT Linux system (e.g. cyclictest reports latencies of ~15ms for guests with 24 VCPUs). This can be further aggravated by sending a lot of external interrupts to the guest. A malicious app can abuse this scenario, causing a DoS of the host Linux. Until the KVM openpic code is refactored to use finer lock granularity, impose a limitation on the number of VCPUs a guest can have when running on a PREEMPT_RT_FULL system with KVM_MPIC emulation. Signed-off-by: Mihai Caraman mihai.cara...@freescale.com Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com Reviewed-by: Scott Wood scottw...@freescale.com --- arch/powerpc/include/asm/kvm_host.h | 6 ++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 8ef0512..6f6b928 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -36,8 +36,14 @@ #include asm/cacheflush.h #include asm/hvcall.h +#if defined(CONFIG_PREEMPT_RT_FULL) defined(CONFIG_KVM_MPIC) +/* Limit the number of vcpus due to in-kernel mpic concurrency */ +#define KVM_MAX_VCPUS 4 +#define KVM_MAX_VCORES 4 +#else #define KVM_MAX_VCPUS NR_CPUS #define KVM_MAX_VCORES NR_CPUS +#endif #define KVM_USER_MEM_SLOTS 32 #define KVM_MEM_SLOTS_NUM KVM_USER_MEM_SLOTS -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 1/2] powerpc/kvm: Convert openpic lock to raw_spinlock
This patch enables running intensive I/O workloads, e.g. netperf, in a guest deployed on a RT host. It also enable guests to be SMP. The openpic spinlock becomes a sleeping mutex on a RT system. This no longer guarantees that EPR is atomic with exception delivery. The guest VCPU thread fails due to a BUG_ON(preemptible()) when running netperf. In order to make the kvmppc_mpic_set_epr() call safe on RT from non-atomic context, convert the openpic lock to a raw_spinlock. A similar approach can be seen for x86 platforms in the following commit [1]. Here are some comparative cyclitest measurements run inside a high priority RT guest run on a RT host. The guest has 1 VCPU and the test has been run for 15 minutes. The guest runs ~750 hackbench processes as background stress. spinlock raw_spinlock Min latency (us) 4 4 Avg latency (us) 1519 Max latency (us) 7062 [1] https://lkml.org/lkml/2010/1/11/289 Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com Reviewed-by: Scott Wood scottw...@freescale.com --- arch/powerpc/kvm/mpic.c | 44 ++-- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 39b3a8f..9fad0aa 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -196,7 +196,7 @@ struct openpic { int num_mmio_regions; gpa_t reg_base; - spinlock_t lock; + raw_spinlock_t lock; /* Behavior control */ struct fsl_mpic_info *fsl; @@ -1108,9 +1108,9 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, mpic_irq_raise(opp, dst, ILR_INTTGT_INT); } - spin_unlock(opp-lock); + raw_spin_unlock(opp-lock); kvm_notify_acked_irq(opp-kvm, 0, notify_eoi); - spin_lock(opp-lock); + raw_spin_lock(opp-lock); break; } @@ -1185,12 +1185,12 @@ void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) int cpu = vcpu-arch.irq_cpu_id; unsigned long flags; - spin_lock_irqsave(opp-lock, flags); + raw_spin_lock_irqsave(opp-lock, flags); if ((opp-gcr opp-mpic_mode_mask) == GCR_MODE_PROXY) kvmppc_set_epr(vcpu, openpic_iack(opp, opp-dst[cpu], cpu)); - spin_unlock_irqrestore(opp-lock, flags); + raw_spin_unlock_irqrestore(opp-lock, flags); } static int openpic_cpu_read_internal(void *opaque, gpa_t addr, @@ -1390,9 +1390,9 @@ static int kvm_mpic_read(struct kvm_io_device *this, gpa_t addr, return -EINVAL; } - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); ret = kvm_mpic_read_internal(opp, addr - opp-reg_base, u.val); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); /* * Technically only 32-bit accesses are allowed, but be nice to @@ -1430,10 +1430,10 @@ static int kvm_mpic_write(struct kvm_io_device *this, gpa_t addr, return -EOPNOTSUPP; } - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); ret = kvm_mpic_write_internal(opp, addr - opp-reg_base, *(const u32 *)ptr); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); pr_debug(%s: addr %llx ret %d val %x\n, __func__, addr, ret, *(const u32 *)ptr); @@ -1504,14 +1504,14 @@ static int access_reg(struct openpic *opp, gpa_t addr, u32 *val, int type) if (addr 3) return -ENXIO; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); if (type == ATTR_SET) ret = kvm_mpic_write_internal(opp, addr, *val); else ret = kvm_mpic_read_internal(opp, addr, val); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); pr_debug(%s: type %d addr %llx val %x\n, __func__, type, addr, *val); @@ -1548,9 +1548,9 @@ static int mpic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (attr32 != 0 attr32 != 1) return -EINVAL; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); openpic_set_irq(opp, attr-attr, attr32); - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); return 0; } @@ -1595,9 +1595,9 @@ static int mpic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr) if (attr-attr MAX_SRC) return -EINVAL; - spin_lock_irq(opp-lock); + raw_spin_lock_irq(opp-lock); attr32 = opp-src[attr-attr].pending; - spin_unlock_irq(opp-lock); + raw_spin_unlock_irq(opp-lock); if (put_user(attr32, (u32 __user *)(long)attr-addr
[PATCH 0/2] powerpc/kvm: Enable running guests on RT Linux
This patchset enables running KVM SMP guests with external interrupts on an underlying RT-enabled Linux. Previous to this patch, a guest with in-kernel MPIC emulation could easily panic the kernel due to preemption when delivering IPIs and external interrupts, because of the openpic spinlock becoming a sleeping mutex on PREEMPT_RT_FULL Linux. 0001: converts the openpic spinlock to a raw spinlock, in order to circumvent this behavior. While this change is targeted for a RT enabled Linux, it has no effect on upstream kvm-ppc, so send it upstream for better future maintenance. 0002: introduces a limit on the maximum VCPUs a guest can have, in order to prevent potential DoS attack due to large system latencies. This patch is targeted to RT (due to CONFIG_PREEMPT_RT_FULL), but it can also be applied on upstream Linux, with no effect. Not sure if it's best to send it upstream and have a hanging CONFIG_PREEMPT_RT_FULL check there, with no effect, or send it against linux-stable-rt. Please apply as you consider appropriate. - applied compiled against upstream 3.19 - applied compiled against stable-rt 3.14-rt (0002 with minor fuzz) Bogdan Purcareata (2): powerpc/kvm: Convert openpic lock to raw_spinlock powerpc/kvm: Limit MAX_VCPUS for guests running on RT Linux arch/powerpc/include/asm/kvm_host.h | 6 + arch/powerpc/kvm/mpic.c | 44 ++--- 2 files changed, 28 insertions(+), 22 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 0/3] powerpc: Enable seccomp filter support
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC architectures, and enable this support. Testing has been pursued using libseccomp with the latest ppc support patches [1][2], on Freescale platforms for both ppc and ppc64. ppc64le support is untested. [1] https://groups.google.com/forum/#!topic/libseccomp/oz42LfMDsxg [2] https://groups.google.com/forum/#!topic/libseccomp/TQWfCt_nD7c v3: - keep setting ENOSYS in syscall entry assembly when syscall tracing is disabled v2: - move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter Bogdan Purcareata (3): powerpc: Don't force ENOSYS as error on syscall fail powerpc: Relax secure computing on syscall entry trace powerpc: Enable HAVE_ARCH_SECCOMP_FILTER arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/entry_32.S | 7 ++- arch/powerpc/kernel/entry_64.S | 5 +++-- arch/powerpc/kernel/ptrace.c | 8 ++-- 4 files changed, 16 insertions(+), 5 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 2/3] powerpc: Relax secure computing on syscall entry trace
The secure_computing_strict will just force the kernel to panic on secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel, syscalls can be denied without system failure. Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 2edae06..285e056 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) user_exit(); - secure_computing_strict(regs-gpr[0]); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs-gpr[0]) == -1) + return -1L; if (test_thread_flag(TIF_SYSCALL_TRACE) tracehook_report_syscall_entry(regs)) { -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 22b0940..2588b57 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -104,6 +104,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN select HAVE_KPROBES select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v3 1/3] powerpc: Don't force ENOSYS as error on syscall fail
In certain scenarios - e.g. seccomp filtering with ERRNO as default action - the system call fails for other reasons than the syscall not being available. The seccomp filter can be configured to store a user-defined error code on return from a blacklisted syscall. Don't always set ENOSYS on do_syscall_trace_enter failure. Delegate setting ENOSYS in case of failure, where appropriate, to do_syscall_trace_enter. v3: - keep setting ENOSYS in the syscall entry assembly for scenarios without syscall tracing v2: - move setting ENOSYS as errno from the syscall entry assembly to do_syscall_trace_enter, only in the specific case Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/entry_32.S | 7 ++- arch/powerpc/kernel/entry_64.S | 5 +++-- arch/powerpc/kernel/ptrace.c | 4 +++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 46fc0f4..b2f88cd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -333,12 +333,12 @@ _GLOBAL(DoSyscall) lwz r11,TI_FLAGS(r10) andi. r11,r11,_TIF_SYSCALL_DOTRACE bne-syscall_dotrace -syscall_dotrace_cont: cmplwi 0,r0,NR_syscalls lis r10,sys_call_table@h ori r10,r10,sys_call_table@l slwir0,r0,2 bge-66f +syscall_dotrace_cont: lwzxr10,r10,r0 /* Fetch system call handler [ptr] */ mtlrr10 addir9,r1,STACK_FRAME_OVERHEAD @@ -457,6 +457,11 @@ syscall_dotrace: lwz r7,GPR7(r1) lwz r8,GPR8(r1) REST_NVGPRS(r1) + cmplwi 0,r0,NR_syscalls + lis r10,sys_call_table@h + ori r10,r10,sys_call_table@l + slwir0,r0,2 + bge-ret_from_syscall b syscall_dotrace_cont syscall_exit_work: diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d180caf2..0d22fa8 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -144,7 +144,6 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR) ld r10,TI_FLAGS(r11) andi. r11,r10,_TIF_SYSCALL_DOTRACE bne syscall_dotrace -.Lsyscall_dotrace_cont: cmpldi 0,r0,NR_syscalls bge-syscall_enosys @@ -253,7 +252,9 @@ syscall_dotrace: addir9,r1,STACK_FRAME_OVERHEAD CURRENT_THREAD_INFO(r10, r1) ld r10,TI_FLAGS(r10) - b .Lsyscall_dotrace_cont + cmpldi 0,r0,NR_syscalls + bge-syscall_exit + b system_call syscall_enosys: li r3,-ENOSYS diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f21897b..2edae06 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs) secure_computing_strict(regs-gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) - tracehook_report_syscall_entry(regs)) + tracehook_report_syscall_entry(regs)) { /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS * error, but leave the original number in regs-gpr[0]. */ ret = -1L; + syscall_set_return_value(current, regs, ENOSYS, 0); + } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs-gpr[0]); -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 3/3] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a2a168e..72f363e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -104,6 +104,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN select HAVE_KPROBES select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc: Relax secure computing on syscall entry trace
The secure_computing_strict will just force the kernel to panic on secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel, syscalls can be denied without system failure. Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index d82fd0b..d41faab 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) user_exit(); - secure_computing_strict(regs-gpr[0]); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs-gpr[0]) == -1) + return -1L; if (test_thread_flag(TIF_SYSCALL_TRACE) tracehook_report_syscall_entry(regs)) { -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 0/3] powerpc: Enable seccomp filter support
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC architectures, and enable this support. Testing has been pursued using libseccomp with the latest ppc support patches [1], on Freescale platforms for both ppc and ppc64. ppc64le support is untested. [1] https://groups.google.com/forum/#!topic/libseccomp/ktR-bQr1tGw v2: - move setting ENOSYS from syscall entry assembly to do_syscall_trace_enter Bogdan Purcareata (3): powerpc: Don't force ENOSYS as error on syscall fail powerpc: Relax secure computing on syscall entry trace powerpc: Enable HAVE_ARCH_SECCOMP_FILTER arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/entry_64.S | 1 - arch/powerpc/kernel/ptrace.c | 8 ++-- 4 files changed, 8 insertions(+), 4 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2 1/3] powerpc: Don't force ENOSYS as error on syscall fail
In certain scenarios - e.g. seccomp filtering with ERRNO as default action - the system call fails for other reasons than the syscall not being available. The seccomp filter can be configured to store a user-defined error code on return from a blacklisted syscall. Don't always set ENOSYS on do_syscall_trace_enter failure. v2: - move setting ENOSYS as errno from the syscall entry assembly to do_syscall_trace_enter, only in the specific case Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/entry_32.S | 2 +- arch/powerpc/kernel/entry_64.S | 1 - arch/powerpc/kernel/ptrace.c | 4 +++- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 10a0935..d2c58a3 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -425,7 +425,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) b 1b #endif /* CONFIG_44x */ -66:li r3,-ENOSYS +66: b ret_from_syscall .globl ret_from_fork diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 194e46d..0111e04 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -269,7 +269,6 @@ syscall_dotrace: b .Lsyscall_dotrace_cont syscall_enosys: - li r3,-ENOSYS b syscall_exit syscall_exit_work: diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index f21897b..d82fd0b 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1775,13 +1775,15 @@ long do_syscall_trace_enter(struct pt_regs *regs) secure_computing_strict(regs-gpr[0]); if (test_thread_flag(TIF_SYSCALL_TRACE) - tracehook_report_syscall_entry(regs)) + tracehook_report_syscall_entry(regs)) { /* * Tracing decided this syscall should not happen. * We'll return a bogus call number to get an ENOSYS * error, but leave the original number in regs-gpr[0]. */ ret = -1L; + syscall_set_return_value(current, regs, -ENOSYS, 0); + } if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) trace_sys_enter(regs, regs-gpr[0]); -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 0/3] powerpc: Enable seccomp filter support
Add the missing pieces in order to enable SECCOMP_FILTER on PowerPC architectures, and enable this support. Testing has been pursued using libseccomp with the latest ppc support patches [1], on Freescale platforms for both ppc and ppc64. ppc64le support is untested. [1] https://groups.google.com/forum/#!topic/libseccomp/ktR-bQr1tGw Bogdan Purcareata (3): powerpc: Don't force ENOSYS as error on syscall fail powerpc: Relax secure computing on syscall entry trace powerpc: Enable HAVE_ARCH_SECCOMP_FILTER arch/powerpc/Kconfig | 1 + arch/powerpc/kernel/entry_32.S | 3 ++- arch/powerpc/kernel/entry_64.S | 2 +- arch/powerpc/kernel/ptrace.c | 4 +++- 4 files changed, 7 insertions(+), 3 deletions(-) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH 2/3] powerpc: Relax secure computing on syscall entry trace
The secure_computing_strict will just force the kernel to panic on secure_computing failure. Once SECCOMP_FILTER support is enabled in the kernel, syscalls can be denied without system failure. Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/ptrace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index 100e01c..5c654ac 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -1772,7 +1772,9 @@ long do_syscall_trace_enter(struct pt_regs *regs) user_exit(); - secure_computing_strict(regs-gpr[0]); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(regs-gpr[0]) == -1) + return -1L; if (test_thread_flag(TIF_SYSCALL_TRACE) tracehook_report_syscall_entry(regs)) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc: Enable HAVE_ARCH_SECCOMP_FILTER
Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index a2a168e..72f363e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -104,6 +104,7 @@ config PPC select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_LITTLE_ENDIAN select HAVE_KPROBES select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER select HAVE_KRETPROBES select HAVE_ARCH_TRACEHOOK select HAVE_MEMBLOCK -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[RFC][PATCH 1/3] powerpc: Don't force ENOSYS as error on syscall fail
In certain scenarios - e.g. seccomp filtering with ERRNO as default action - the system call fails for other reasons than the syscall not being available. The seccomp filter can be configured to store a user-defined error code on return from a blacklisted syscall. The RFC is this: are there currently any user-space scenarios where it is required that the system call return ENOSYS as error code on failure, no matter the circumstances? I don't want to break userspace requirements. I have not added code to force this error code in situations different than secure_computing failure, in order to keep overhead at a minimum. Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- arch/powerpc/kernel/entry_32.S | 3 ++- arch/powerpc/kernel/entry_64.S | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 59848e5..52e48dd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -425,7 +425,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_NEED_PAIRED_STWCX) b 1b #endif /* CONFIG_44x */ -66:li r3,-ENOSYS +66: +# li r3,-ENOSYS b ret_from_syscall .globl ret_from_fork diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index e6bfe8e..80db02e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -269,7 +269,7 @@ syscall_dotrace: b .Lsyscall_dotrace_cont syscall_enosys: - li r3,-ENOSYS +# li r3,-ENOSYS b syscall_exit syscall_exit_work: -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH v2] powerpc/mpic: Add DT option to skip readback after EOI
The readback acts as a synchronization mechanism in handling external interrupts, making sure the core waits until EOI write completion. This is required in certain scenarios, such as when the MPIC communicates with a PCI device in posted write mode. If the device uses legacy interrupts, and the CPU returns from the interrupt as soon as it fires the EOI write, there is a chance to receive spurious interrupts because the line isn't deasserted yet. This doesn't happen in an emulated environment, e.g. KVM openpic, therefore the readback is not required. In order to satisfy both cases, make the readback optional and configurable through the device tree. Skipping the readback saves a MMIO trap per interrupt. v2: updated commit message Signed-off-by: Scott Wood scottw...@freescale.com [add DT binding, update commit message] Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | 13 + arch/powerpc/include/asm/mpic.h| 2 ++ arch/powerpc/sysdev/mpic.c | 8 +++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt index dc57446..9789094 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt @@ -77,6 +77,19 @@ PROPERTIES in the global feature registers. If specified, this field will override the value read from MPIC_GREG_FEATURE_LAST_SRC. + - mpic-eoi-no-readback + Usage: optional + Value type: empty + Definition: The presence of this property specifies that the + MPIC will not issue a readback when delivering the EOI for an + external interrupt. The readback operation is done by reading + the CPU WHOAMI register after writing to the CPU EOI register. + Originally, this was required due to the fact that the MPIC + operates at lower frequencies, or in scenarios where the MPIC + is connected through PCI with write posting. This is not the + case in an emulated environment (e.g. KVM guest), or in scenarios + where interrupts are not handled in a loop of get_irq() calls. + INTERRUPT SPECIFIER DEFINITION Interrupt specifiers consists of 4 cells encoded as diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 754f93d..e2a4146 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -386,6 +386,8 @@ extern struct bus_type mpic_subsys; * from the BRR1 register). */ #define MPIC_FSL_HAS_EIMR 0x0001 +/* Dont bother with readback after MPIC EOI */ +#define MPIC_EOI_NO_READBACK 0x0002 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf000 diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index f3e8624..431f68e 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -656,7 +656,9 @@ static inline struct mpic * mpic_from_irq_data(struct irq_data *d) static inline void mpic_eoi(struct mpic *mpic) { mpic_cpu_write(MPIC_INFO(CPU_EOI), 0); - (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI)); + + if (!(mpic-flags MPIC_EOI_NO_READBACK)) + (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI)); } /* @@ -1290,6 +1292,10 @@ struct mpic * __init mpic_alloc(struct device_node *node, flags |= MPIC_SINGLE_DEST_CPU; if (of_device_is_compatible(node, fsl,mpic)) flags |= MPIC_FSL | MPIC_LARGE_VECTORS; + if (of_get_property(node, mpic-eoi-no-readback, NULL)) { + pr_debug(mpic: no readback activated); + flags |= MPIC_EOI_NO_READBACK; + } mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL); if (mpic == NULL) -- 2.1.4 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev
[PATCH] powerpc/mpic: Add DT option to skip readback after EOI
The readback is necessary in order to handle PCI posted writes, or when the MPIC is handling interrupts in a loop (ppc_md.get_irq). Newer MPIC versions don't require this readback. Leave the option configurable using a device tree entry. This saves a MMIO trap per interrupt. Signed-off-by: Scott Wood scottw...@freescale.com Signed-off-by: Bogdan Purcareata bogdan.purcare...@freescale.com --- Documentation/devicetree/bindings/powerpc/fsl/mpic.txt | 13 + arch/powerpc/include/asm/mpic.h| 2 ++ arch/powerpc/sysdev/mpic.c | 8 +++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt index dc57446..9789094 100644 --- a/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt +++ b/Documentation/devicetree/bindings/powerpc/fsl/mpic.txt @@ -77,6 +77,19 @@ PROPERTIES in the global feature registers. If specified, this field will override the value read from MPIC_GREG_FEATURE_LAST_SRC. + - mpic-eoi-no-readback + Usage: optional + Value type: empty + Definition: The presence of this property specifies that the + MPIC will not issue a readback when delivering the EOI for an + external interrupt. The readback operation is done by reading + the CPU WHOAMI register after writing to the CPU EOI register. + Originally, this was required due to the fact that the MPIC + operates at lower frequencies, or in scenarios where the MPIC + is connected through PCI with write posting. This is not the + case in an emulated environment (e.g. KVM guest), or in scenarios + where interrupts are not handled in a loop of get_irq() calls. + INTERRUPT SPECIFIER DEFINITION Interrupt specifiers consists of 4 cells encoded as diff --git a/arch/powerpc/include/asm/mpic.h b/arch/powerpc/include/asm/mpic.h index 754f93d..e2a4146 100644 --- a/arch/powerpc/include/asm/mpic.h +++ b/arch/powerpc/include/asm/mpic.h @@ -386,6 +386,8 @@ extern struct bus_type mpic_subsys; * from the BRR1 register). */ #define MPIC_FSL_HAS_EIMR 0x0001 +/* Dont bother with readback after MPIC EOI */ +#define MPIC_EOI_NO_READBACK 0x0002 /* MPIC HW modification ID */ #define MPIC_REGSET_MASK 0xf000 diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index f3e8624..431f68e 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -656,7 +656,9 @@ static inline struct mpic * mpic_from_irq_data(struct irq_data *d) static inline void mpic_eoi(struct mpic *mpic) { mpic_cpu_write(MPIC_INFO(CPU_EOI), 0); - (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI)); + + if (!(mpic-flags MPIC_EOI_NO_READBACK)) + (void)mpic_cpu_read(MPIC_INFO(CPU_WHOAMI)); } /* @@ -1290,6 +1292,10 @@ struct mpic * __init mpic_alloc(struct device_node *node, flags |= MPIC_SINGLE_DEST_CPU; if (of_device_is_compatible(node, fsl,mpic)) flags |= MPIC_FSL | MPIC_LARGE_VECTORS; + if (of_get_property(node, mpic-eoi-no-readback, NULL)) { + pr_debug(mpic: no readback activated); + flags |= MPIC_EOI_NO_READBACK; + } mpic = kzalloc(sizeof(struct mpic), GFP_KERNEL); if (mpic == NULL) -- 2.1.3 ___ Linuxppc-dev mailing list Linuxppc-dev@lists.ozlabs.org https://lists.ozlabs.org/listinfo/linuxppc-dev