[tip:perf/urgent] tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release}
Commit-ID: 51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Gitweb: https://git.kernel.org/tip/51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Author: Will Deacon AuthorDate: Wed, 31 Oct 2018 17:44:08 + Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 1 Nov 2018 10:07:43 -0300 tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release} Cheers for reporting this. I managed to reproduce the build failure with gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1). The code in question is the arm64 versions of smp_load_acquire() and smp_store_release(). Unlike other architectures, these are not built around READ_ONCE() and WRITE_ONCE() since we have instructions we can use instead of fences. Bringing our macros up-to-date with those (i.e. tweaking the union initialisation and using the special "uXX_alias_t" types) appears to fix the issue for me. Committer notes: Testing it in the systems previously failing: # time dm android-ndk:r12b-arm \ android-ndk:r15c-arm \ debian:experimental-x-arm64 \ ubuntu:14.04.4-x-linaro-arm64 \ ubuntu:16.04-x-arm \ ubuntu:16.04-x-arm64 \ ubuntu:18.04-x-arm \ ubuntu:18.04-x-arm64 1 android-ndk:r12b-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 2 android-ndk:r15c-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 3 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 8.2.0-7) 8.2.0 4 ubuntu:14.04.4-x-linaro-arm64 : Ok aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0 5 ubuntu:16.04-x-arm: Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 6 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 7 ubuntu:18.04-x-arm: Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 8 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Will Deacon Tested-by: Arnaldo Carvalho de Melo Tested-by: Daniel Borkmann Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20181031174408.ga27...@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/barrier.h | 133 + 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 12835ea0e417..378c051fa177 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,74 +14,75 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") -#define smp_store_release(p, v)\ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u8 *)__u.__c)\ - : "memory");\ - break; \ - case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u16 *)__u.__c) \ - : "memory");\ - break; \ - case 4: \ - asm volatile ("stlr %w1, %0"\ - : "=Q" (*p) \ - : "r" (*(__u32 *)__u.__c) \ - : "memory");\ - break; \ - case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u64 *)__u.__c) \ - : "memory");\ - break; \ - default:\ - /* Only to shut up gcc ... */ \ - mb();
[tip:perf/urgent] tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release}
Commit-ID: 51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Gitweb: https://git.kernel.org/tip/51f5fd2e4615dcdc25cd7f9d19b7b27eb9ecdac7 Author: Will Deacon AuthorDate: Wed, 31 Oct 2018 17:44:08 + Committer: Arnaldo Carvalho de Melo CommitDate: Thu, 1 Nov 2018 10:07:43 -0300 tools headers barrier: Fix arm64 tools build failure wrt smp_load_{acquire,release} Cheers for reporting this. I managed to reproduce the build failure with gcc version 6.3.0 20170516 (Debian 6.3.0-18+deb9u1). The code in question is the arm64 versions of smp_load_acquire() and smp_store_release(). Unlike other architectures, these are not built around READ_ONCE() and WRITE_ONCE() since we have instructions we can use instead of fences. Bringing our macros up-to-date with those (i.e. tweaking the union initialisation and using the special "uXX_alias_t" types) appears to fix the issue for me. Committer notes: Testing it in the systems previously failing: # time dm android-ndk:r12b-arm \ android-ndk:r15c-arm \ debian:experimental-x-arm64 \ ubuntu:14.04.4-x-linaro-arm64 \ ubuntu:16.04-x-arm \ ubuntu:16.04-x-arm64 \ ubuntu:18.04-x-arm \ ubuntu:18.04-x-arm64 1 android-ndk:r12b-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 2 android-ndk:r15c-arm : Ok arm-linux-androideabi-gcc (GCC) 4.9.x 20150123 (prerelease) 3 debian:experimental-x-arm64 : Ok aarch64-linux-gnu-gcc (Debian 8.2.0-7) 8.2.0 4 ubuntu:14.04.4-x-linaro-arm64 : Ok aarch64-linux-gnu-gcc (Linaro GCC 5.5-2017.10) 5.5.0 5 ubuntu:16.04-x-arm: Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 6 ubuntu:16.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 5.4.0-6ubuntu1~16.04.9) 5.4.0 20160609 7 ubuntu:18.04-x-arm: Ok arm-linux-gnueabihf-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 8 ubuntu:18.04-x-arm64 : Ok aarch64-linux-gnu-gcc (Ubuntu/Linaro 7.3.0-27ubuntu1~18.04) 7.3.0 Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Will Deacon Tested-by: Arnaldo Carvalho de Melo Tested-by: Daniel Borkmann Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20181031174408.ga27...@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/arm64/include/asm/barrier.h | 133 + 1 file changed, 67 insertions(+), 66 deletions(-) diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h index 12835ea0e417..378c051fa177 100644 --- a/tools/arch/arm64/include/asm/barrier.h +++ b/tools/arch/arm64/include/asm/barrier.h @@ -14,74 +14,75 @@ #define wmb() asm volatile("dmb ishst" ::: "memory") #define rmb() asm volatile("dmb ishld" ::: "memory") -#define smp_store_release(p, v)\ -do { \ - union { typeof(*p) __val; char __c[1]; } __u = \ - { .__val = (__force typeof(*p)) (v) }; \ - \ - switch (sizeof(*p)) { \ - case 1: \ - asm volatile ("stlrb %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u8 *)__u.__c)\ - : "memory");\ - break; \ - case 2: \ - asm volatile ("stlrh %w1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u16 *)__u.__c) \ - : "memory");\ - break; \ - case 4: \ - asm volatile ("stlr %w1, %0"\ - : "=Q" (*p) \ - : "r" (*(__u32 *)__u.__c) \ - : "memory");\ - break; \ - case 8: \ - asm volatile ("stlr %1, %0" \ - : "=Q" (*p) \ - : "r" (*(__u64 *)__u.__c) \ - : "memory");\ - break; \ - default:\ - /* Only to shut up gcc ... */ \ - mb();
[tip:locking/urgent] MAINTAINERS: Remove dead path from LOCKING PRIMITIVES entry
Commit-ID: 6d348925b306ab0cc9757a09a8cea6bf288018e4 Gitweb: https://git.kernel.org/tip/6d348925b306ab0cc9757a09a8cea6bf288018e4 Author: Will Deacon AuthorDate: Mon, 1 Oct 2018 15:28:56 +0100 Committer: Ingo Molnar CommitDate: Tue, 2 Oct 2018 10:45:57 +0200 MAINTAINERS: Remove dead path from LOCKING PRIMITIVES entry Since 890658b7ab48 ("locking/mutex: Kill arch specific code"), there are no mutex header files under arch/, so we can remove the redundant entry from MAINTAINERS. Reported-by: Joe Perches Signed-off-by: Will Deacon Cc: Jason Low Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20181001142856.gc9...@arm.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a255240d1452..3bd8913b2d78 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8598,7 +8598,6 @@ F:include/linux/spinlock*.h F: arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h -F: arch/*/include/asm/mutex*.h F: include/linux/rwsem*.h F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h
[tip:locking/urgent] MAINTAINERS: Remove dead path from LOCKING PRIMITIVES entry
Commit-ID: 6d348925b306ab0cc9757a09a8cea6bf288018e4 Gitweb: https://git.kernel.org/tip/6d348925b306ab0cc9757a09a8cea6bf288018e4 Author: Will Deacon AuthorDate: Mon, 1 Oct 2018 15:28:56 +0100 Committer: Ingo Molnar CommitDate: Tue, 2 Oct 2018 10:45:57 +0200 MAINTAINERS: Remove dead path from LOCKING PRIMITIVES entry Since 890658b7ab48 ("locking/mutex: Kill arch specific code"), there are no mutex header files under arch/, so we can remove the redundant entry from MAINTAINERS. Reported-by: Joe Perches Signed-off-by: Will Deacon Cc: Jason Low Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20181001142856.gc9...@arm.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 1 - 1 file changed, 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a255240d1452..3bd8913b2d78 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8598,7 +8598,6 @@ F:include/linux/spinlock*.h F: arch/*/include/asm/spinlock*.h F: include/linux/rwlock*.h F: include/linux/mutex*.h -F: arch/*/include/asm/mutex*.h F: include/linux/rwsem*.h F: arch/*/include/asm/rwsem.h F: include/linux/seqlock.h
[tip:sched/urgent] rseq: Avoid infinite recursion when delivering SIGSEGV
Commit-ID: 784e0300fe9fe4aa81bd7df9d59e138f56bb605b Gitweb: https://git.kernel.org/tip/784e0300fe9fe4aa81bd7df9d59e138f56bb605b Author: Will Deacon AuthorDate: Fri, 22 Jun 2018 11:45:07 +0100 Committer: Thomas Gleixner CommitDate: Fri, 22 Jun 2018 19:04:22 +0200 rseq: Avoid infinite recursion when delivering SIGSEGV When delivering a signal to a task that is using rseq, we call into __rseq_handle_notify_resume() so that the registers pushed in the sigframe are updated to reflect the state of the restartable sequence (for example, ensuring that the signal returns to the abort handler if necessary). However, if the rseq management fails due to an unrecoverable fault when accessing userspace or certain combinations of RSEQ_CS_* flags, then we will attempt to deliver a SIGSEGV. This has the potential for infinite recursion if the rseq code continuously fails on signal delivery. Avoid this problem by using force_sigsegv() instead of force_sig(), which is explicitly designed to reset the SEGV handler to SIG_DFL in the case of a recursive fault. In doing so, remove rseq_signal_deliver() from the internal rseq API and have an optional struct ksignal * parameter to rseq_handle_notify_resume() instead. Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Mathieu Desnoyers Cc: pet...@infradead.org Cc: paul...@linux.vnet.ibm.com Cc: boqun.f...@gmail.com Link: https://lkml.kernel.org/r/1529664307-983-1-git-send-email-will.dea...@arm.com --- arch/arm/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.c | 4 ++-- arch/x86/entry/common.c | 2 +- arch/x86/kernel/signal.c | 2 +- include/linux/sched.h| 18 +++--- kernel/rseq.c| 7 --- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f09e9d66d605..dec130e7078c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -544,7 +544,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* * Set up the stack frame @@ -666,7 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 17fe4339ba59..b3e8db376ecd 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static void do_signal(struct task_struct *tsk) /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); - rseq_signal_deliver(tsk->thread.regs); + rseq_signal_deliver(, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) @@ -170,7 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } user_enter(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 92190879b228..3b2490b81918 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 445ca11ff863..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c1882643d455..9256118bd40c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1799,20 +1799,22 @@
[tip:sched/urgent] rseq: Avoid infinite recursion when delivering SIGSEGV
Commit-ID: 784e0300fe9fe4aa81bd7df9d59e138f56bb605b Gitweb: https://git.kernel.org/tip/784e0300fe9fe4aa81bd7df9d59e138f56bb605b Author: Will Deacon AuthorDate: Fri, 22 Jun 2018 11:45:07 +0100 Committer: Thomas Gleixner CommitDate: Fri, 22 Jun 2018 19:04:22 +0200 rseq: Avoid infinite recursion when delivering SIGSEGV When delivering a signal to a task that is using rseq, we call into __rseq_handle_notify_resume() so that the registers pushed in the sigframe are updated to reflect the state of the restartable sequence (for example, ensuring that the signal returns to the abort handler if necessary). However, if the rseq management fails due to an unrecoverable fault when accessing userspace or certain combinations of RSEQ_CS_* flags, then we will attempt to deliver a SIGSEGV. This has the potential for infinite recursion if the rseq code continuously fails on signal delivery. Avoid this problem by using force_sigsegv() instead of force_sig(), which is explicitly designed to reset the SEGV handler to SIG_DFL in the case of a recursive fault. In doing so, remove rseq_signal_deliver() from the internal rseq API and have an optional struct ksignal * parameter to rseq_handle_notify_resume() instead. Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Acked-by: Mathieu Desnoyers Cc: pet...@infradead.org Cc: paul...@linux.vnet.ibm.com Cc: boqun.f...@gmail.com Link: https://lkml.kernel.org/r/1529664307-983-1-git-send-email-will.dea...@arm.com --- arch/arm/kernel/signal.c | 4 ++-- arch/powerpc/kernel/signal.c | 4 ++-- arch/x86/entry/common.c | 2 +- arch/x86/kernel/signal.c | 2 +- include/linux/sched.h| 18 +++--- kernel/rseq.c| 7 --- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index f09e9d66d605..dec130e7078c 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -544,7 +544,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* * Set up the stack frame @@ -666,7 +666,7 @@ do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) } else { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } } local_irq_disable(); diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 17fe4339ba59..b3e8db376ecd 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -134,7 +134,7 @@ static void do_signal(struct task_struct *tsk) /* Re-enable the breakpoints for the signal stack */ thread_change_pc(tsk, tsk->thread.regs); - rseq_signal_deliver(tsk->thread.regs); + rseq_signal_deliver(, tsk->thread.regs); if (is32) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) @@ -170,7 +170,7 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) if (thread_info_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } user_enter(); diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 92190879b228..3b2490b81918 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -164,7 +164,7 @@ static void exit_to_usermode_loop(struct pt_regs *regs, u32 cached_flags) if (cached_flags & _TIF_NOTIFY_RESUME) { clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); - rseq_handle_notify_resume(regs); + rseq_handle_notify_resume(NULL, regs); } if (cached_flags & _TIF_USER_RETURN_NOTIFY) diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 445ca11ff863..92a3b312a53c 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -692,7 +692,7 @@ setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) * Increment event counter and perform fixup for the pre-signal * frame. */ - rseq_signal_deliver(regs); + rseq_signal_deliver(ksig, regs); /* Set up the stack frame */ if (is_ia32_frame(ksig)) { diff --git a/include/linux/sched.h b/include/linux/sched.h index c1882643d455..9256118bd40c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1799,20 +1799,22 @@
[tip:locking/core] locking/memory-barriers.txt: Fix broken DMA vs. MMIO ordering example
Commit-ID: 5846581e35637771952602eecc1e20ece5ced011 Gitweb: https://git.kernel.org/tip/5846581e35637771952602eecc1e20ece5ced011 Author: Will DeaconAuthorDate: Mon, 14 May 2018 15:55:26 -0700 Committer: Ingo Molnar CommitDate: Tue, 15 May 2018 08:11:13 +0200 locking/memory-barriers.txt: Fix broken DMA vs. MMIO ordering example The section of memory-barriers.txt that describes the dma_Xmb() barriers has an incorrect example claiming that a wmb() is required after writing to coherent memory in order for those writes to be visible to a device before a subsequent MMIO access using writel() can reach the device. In fact, this ordering guarantee is provided (at significant cost on some architectures such as arm and power) by writel, so the wmb() is not necessary. writel_relaxed exists for cases where this ordering is not required. Fix the example and update the text to make this clearer. Reported-by: Sinan Kaya Signed-off-by: Will Deacon Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: aki...@gmail.com Cc: boqun.f...@gmail.com Cc: dhowe...@redhat.com Cc: j.algl...@ucl.ac.uk Cc: linux-a...@vger.kernel.org Cc: luc.maran...@inria.fr Cc: npig...@gmail.com Cc: parri.and...@gmail.com Cc: st...@rowland.harvard.edu Link: http://lkml.kernel.org/r/1526338533-6044-1-git-send-email-paul...@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 6dafc8085acc..34c1970908a5 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1920,9 +1920,6 @@ There are some more advanced barrier functions: /* assign ownership */ desc->status = DEVICE_OWN; - /* force memory to sync before notifying device via MMIO */ - wmb(); - /* notify device of new descriptors */ writel(DESC_NOTIFY, doorbell); } @@ -1930,11 +1927,15 @@ There are some more advanced barrier functions: The dma_rmb() allows us guarantee the device has released ownership before we read the data from the descriptor, and the dma_wmb() allows us to guarantee the data is written to the descriptor before the device - can see it now has ownership. The wmb() is needed to guarantee that the - cache coherent memory writes have completed before attempting a write to - the cache incoherent MMIO region. - - See Documentation/DMA-API.txt for more information on consistent memory. + can see it now has ownership. Note that, when using writel(), a prior + wmb() is not needed to guarantee that the cache coherent memory writes + have completed before writing to the MMIO region. The cheaper + writel_relaxed() does not provide this guarantee and must not be used + here. + + See the subsection "Kernel I/O barrier effects" for more information on + relaxed I/O accessors and the Documentation/DMA-API.txt file for more + information on consistent memory. MMIO WRITE BARRIER
[tip:locking/core] locking/memory-barriers.txt: Fix broken DMA vs. MMIO ordering example
Commit-ID: 5846581e35637771952602eecc1e20ece5ced011 Gitweb: https://git.kernel.org/tip/5846581e35637771952602eecc1e20ece5ced011 Author: Will Deacon AuthorDate: Mon, 14 May 2018 15:55:26 -0700 Committer: Ingo Molnar CommitDate: Tue, 15 May 2018 08:11:13 +0200 locking/memory-barriers.txt: Fix broken DMA vs. MMIO ordering example The section of memory-barriers.txt that describes the dma_Xmb() barriers has an incorrect example claiming that a wmb() is required after writing to coherent memory in order for those writes to be visible to a device before a subsequent MMIO access using writel() can reach the device. In fact, this ordering guarantee is provided (at significant cost on some architectures such as arm and power) by writel, so the wmb() is not necessary. writel_relaxed exists for cases where this ordering is not required. Fix the example and update the text to make this clearer. Reported-by: Sinan Kaya Signed-off-by: Will Deacon Signed-off-by: Paul E. McKenney Cc: Andrew Morton Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Jason Gunthorpe Cc: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: aki...@gmail.com Cc: boqun.f...@gmail.com Cc: dhowe...@redhat.com Cc: j.algl...@ucl.ac.uk Cc: linux-a...@vger.kernel.org Cc: luc.maran...@inria.fr Cc: npig...@gmail.com Cc: parri.and...@gmail.com Cc: st...@rowland.harvard.edu Link: http://lkml.kernel.org/r/1526338533-6044-1-git-send-email-paul...@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt | 17 + 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index 6dafc8085acc..34c1970908a5 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1920,9 +1920,6 @@ There are some more advanced barrier functions: /* assign ownership */ desc->status = DEVICE_OWN; - /* force memory to sync before notifying device via MMIO */ - wmb(); - /* notify device of new descriptors */ writel(DESC_NOTIFY, doorbell); } @@ -1930,11 +1927,15 @@ There are some more advanced barrier functions: The dma_rmb() allows us guarantee the device has released ownership before we read the data from the descriptor, and the dma_wmb() allows us to guarantee the data is written to the descriptor before the device - can see it now has ownership. The wmb() is needed to guarantee that the - cache coherent memory writes have completed before attempting a write to - the cache incoherent MMIO region. - - See Documentation/DMA-API.txt for more information on consistent memory. + can see it now has ownership. Note that, when using writel(), a prior + wmb() is not needed to guarantee that the cache coherent memory writes + have completed before writing to the MMIO region. The cheaper + writel_relaxed() does not provide this guarantee and must not be used + here. + + See the subsection "Kernel I/O barrier effects" for more information on + relaxed I/O accessors and the Documentation/DMA-API.txt file for more + information on consistent memory. MMIO WRITE BARRIER
[tip:locking/core] locking/qspinlock: Remove duplicate clear_pending() function from PV code
Commit-ID: 3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 Gitweb: https://git.kernel.org/tip/3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 Author: Will DeaconAuthorDate: Fri, 27 Apr 2018 10:40:13 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 12:55:22 +0200 locking/qspinlock: Remove duplicate clear_pending() function from PV code The native clear_pending() function is identical to the PV version, so the latter can simply be removed. This fixes the build for systems with >= 16K CPUs using the PV lock implementation. Reported-by: Waiman Long Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20180427101619.gb21...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_paravirt.h | 5 - 1 file changed, 5 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 25730b2ac022..5a0cf5f9008c 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -130,11 +130,6 @@ static __always_inline void set_pending(struct qspinlock *lock) atomic_or(_Q_PENDING_VAL, >val); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - atomic_andnot(_Q_PENDING_VAL, >val); -} - static __always_inline int trylock_clear_pending(struct qspinlock *lock) { int val = atomic_read(>val);
[tip:locking/core] locking/qspinlock: Remove duplicate clear_pending() function from PV code
Commit-ID: 3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 Gitweb: https://git.kernel.org/tip/3bea9adc96842b8a7345c7fb202c16ae9c8d5b25 Author: Will Deacon AuthorDate: Fri, 27 Apr 2018 10:40:13 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 12:55:22 +0200 locking/qspinlock: Remove duplicate clear_pending() function from PV code The native clear_pending() function is identical to the PV version, so the latter can simply be removed. This fixes the build for systems with >= 16K CPUs using the PV lock implementation. Reported-by: Waiman Long Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/20180427101619.gb21...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_paravirt.h | 5 - 1 file changed, 5 deletions(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 25730b2ac022..5a0cf5f9008c 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -130,11 +130,6 @@ static __always_inline void set_pending(struct qspinlock *lock) atomic_or(_Q_PENDING_VAL, >val); } -static __always_inline void clear_pending(struct qspinlock *lock) -{ - atomic_andnot(_Q_PENDING_VAL, >val); -} - static __always_inline int trylock_clear_pending(struct qspinlock *lock) { int val = atomic_read(>val);
[tip:locking/core] MAINTAINERS: Add myself as a co-maintainer for the locking subsystem
Commit-ID: baa8c6ddf7be33f2b0ddeb68906d668caf646baa Gitweb: https://git.kernel.org/tip/baa8c6ddf7be33f2b0ddeb68906d668caf646baa Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:28 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:54 +0200 MAINTAINERS: Add myself as a co-maintainer for the locking subsystem I've been heavily involved with concurrency and memory ordering stuff (see ATOMIC INFRASTRUCTURE and LINUX KERNEL MEMORY CONSISTENCY MODEL) and with arm64 now using qrwlock with a view to using qspinlock in the near future, I'm going to continue being involved with the core locking primitives. Reflect this by adding myself as a co-maintainer alongside Ingo and Peter. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Waiman Long Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-15-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dd66ae9a847e..e4585e33862c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8328,6 +8328,7 @@ F:Documentation/admin-guide/LSM/LoadPin.rst LOCKING PRIMITIVES M: Peter Zijlstra M: Ingo Molnar +M: Will Deacon L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core S: Maintained
[tip:locking/core] MAINTAINERS: Add myself as a co-maintainer for the locking subsystem
Commit-ID: baa8c6ddf7be33f2b0ddeb68906d668caf646baa Gitweb: https://git.kernel.org/tip/baa8c6ddf7be33f2b0ddeb68906d668caf646baa Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:28 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:54 +0200 MAINTAINERS: Add myself as a co-maintainer for the locking subsystem I've been heavily involved with concurrency and memory ordering stuff (see ATOMIC INFRASTRUCTURE and LINUX KERNEL MEMORY CONSISTENCY MODEL) and with arm64 now using qrwlock with a view to using qspinlock in the near future, I'm going to continue being involved with the core locking primitives. Reflect this by adding myself as a co-maintainer alongside Ingo and Peter. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Cc: Waiman Long Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-15-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dd66ae9a847e..e4585e33862c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -8328,6 +8328,7 @@ F:Documentation/admin-guide/LSM/LoadPin.rst LOCKING PRIMITIVES M: Peter Zijlstra M: Ingo Molnar +M: Will Deacon L: linux-kernel@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git locking/core S: Maintained
[tip:locking/core] locking/qspinlock: Use try_cmpxchg() instead of cmpxchg() when locking
Commit-ID: ae75d9089ff7095d1d1a12c3cd86b21d3eaf3b15 Gitweb: https://git.kernel.org/tip/ae75d9089ff7095d1d1a12c3cd86b21d3eaf3b15 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:26 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:52 +0200 locking/qspinlock: Use try_cmpxchg() instead of cmpxchg() when locking When reaching the head of an uncontended queue on the qspinlock slow-path, using a try_cmpxchg() instead of a cmpxchg() operation to transition the lock work to _Q_LOCKED_VAL generates slightly better code for x86 and pretty much identical code for arm64. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-13-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 956a12983bd0..46813185957b 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -467,16 +467,15 @@ locked: * Otherwise, we only need to grab the lock. */ - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) == tail) { - /* -* The atomic_cond_read_acquire() call above has provided the -* necessary acquire semantics required for locking. -*/ - old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL); - if (old == val) - goto release; /* No contention */ - } + /* +* In the PV case we might already have _Q_LOCKED_VAL set. +* +* The atomic_cond_read_acquire() call above has provided the +* necessary acquire semantics required for locking. +*/ + if (((val & _Q_TAIL_MASK) == tail) && + atomic_try_cmpxchg_relaxed(>val, , _Q_LOCKED_VAL)) + goto release; /* No contention */ /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ set_locked(lock);
[tip:locking/core] locking/qspinlock: Use try_cmpxchg() instead of cmpxchg() when locking
Commit-ID: ae75d9089ff7095d1d1a12c3cd86b21d3eaf3b15 Gitweb: https://git.kernel.org/tip/ae75d9089ff7095d1d1a12c3cd86b21d3eaf3b15 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:26 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:52 +0200 locking/qspinlock: Use try_cmpxchg() instead of cmpxchg() when locking When reaching the head of an uncontended queue on the qspinlock slow-path, using a try_cmpxchg() instead of a cmpxchg() operation to transition the lock work to _Q_LOCKED_VAL generates slightly better code for x86 and pretty much identical code for arm64. Reported-by: Peter Zijlstra Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-13-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 956a12983bd0..46813185957b 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -467,16 +467,15 @@ locked: * Otherwise, we only need to grab the lock. */ - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) == tail) { - /* -* The atomic_cond_read_acquire() call above has provided the -* necessary acquire semantics required for locking. -*/ - old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL); - if (old == val) - goto release; /* No contention */ - } + /* +* In the PV case we might already have _Q_LOCKED_VAL set. +* +* The atomic_cond_read_acquire() call above has provided the +* necessary acquire semantics required for locking. +*/ + if (((val & _Q_TAIL_MASK) == tail) && + atomic_try_cmpxchg_relaxed(>val, , _Q_LOCKED_VAL)) + goto release; /* No contention */ /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ set_locked(lock);
[tip:locking/core] locking/qspinlock: Use smp_store_release() in queued_spin_unlock()
Commit-ID: 626e5fbc14358901ddaa90ce510e0fbeab310432 Gitweb: https://git.kernel.org/tip/626e5fbc14358901ddaa90ce510e0fbeab310432 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:24 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:51 +0200 locking/qspinlock: Use smp_store_release() in queued_spin_unlock() A qspinlock can be unlocked simply by writing zero to the locked byte. This can be implemented in the generic code, so do that and remove the arch-specific override for x86 in the !PV case. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-11-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 17 ++--- include/asm-generic/qspinlock.h | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index da1370ad206d..3e70bed8a978 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -9,6 +9,12 @@ #define _Q_PENDING_LOOPS (1 << 9) +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); + #definequeued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock @@ -21,12 +27,6 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) smp_store_release(>locked, 0); } -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); - static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { pv_queued_spin_lock_slowpath(lock, val); @@ -42,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } -#else -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - native_queued_spin_unlock(lock); -} #endif #ifdef CONFIG_PARAVIRT diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index b37b4ad7eb94..a8ed0a352d75 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -100,7 +100,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) /* * unlock() needs release semantics: */ - (void)atomic_sub_return_release(_Q_LOCKED_VAL, >val); + smp_store_release(>locked, 0); } #endif
[tip:locking/core] locking/qspinlock: Elide back-to-back RELEASE operations with smp_wmb()
Commit-ID: 9d4646d14d51d62b967a12452c30ea7edf8dd8fa Gitweb: https://git.kernel.org/tip/9d4646d14d51d62b967a12452c30ea7edf8dd8fa Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:25 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:52 +0200 locking/qspinlock: Elide back-to-back RELEASE operations with smp_wmb() The qspinlock slowpath must ensure that the MCS node is fully initialised before it can be reached by another other CPU. This is currently enforced by using a RELEASE operation when updating the tail and also when linking the node into the waitqueue, since the control dependency off xchg_tail is insufficient to enforce sufficient ordering, see: 95bcade33a8a ("locking/qspinlock: Ensure node is initialised before updating prev->next") Back-to-back RELEASE operations may be expensive on some architectures, particularly those that implement them using fences under the hood. We can replace the two RELEASE operations with a single smp_wmb() fence and use RELAXED operations for the subsequent publishing of the node. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-12-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d6c3b029bd93..956a12983bd0 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -164,10 +164,10 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { /* -* Use release semantics to make sure that the MCS node is properly -* initialized before changing the tail code. +* We can use relaxed semantics since the caller ensures that the +* MCS node is properly initialized before updating the tail. */ - return (u32)xchg_release(>tail, + return (u32)xchg_relaxed(>tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; } @@ -212,10 +212,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) for (;;) { new = (val & _Q_LOCKED_PENDING_MASK) | tail; /* -* Use release semantics to make sure that the MCS node is -* properly initialized before changing the tail code. +* We can use relaxed semantics since the caller ensures that +* the MCS node is properly initialized before updating the +* tail. */ - old = atomic_cmpxchg_release(>val, val, new); + old = atomic_cmpxchg_relaxed(>val, val, new); if (old == val) break; @@ -388,12 +389,18 @@ queue: goto release; /* +* Ensure that the initialisation of @node is complete before we +* publish the updated tail via xchg_tail() and potentially link +* @node into the waitqueue via WRITE_ONCE(prev->next, node) below. +*/ + smp_wmb(); + + /* +* Publish the updated tail. * We have already touched the queueing cacheline; don't bother with * pending stuff. * * p,*,* -> n,*,* -* -* RELEASE, such that the stores to @node must be complete. */ old = xchg_tail(lock, tail); next = NULL; @@ -405,14 +412,8 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); - /* -* We must ensure that the stores to @node are observed before -* the write to prev->next. The address dependency from -* xchg_tail is not sufficient to ensure this because the read -* component of xchg_tail is unordered with respect to the -* initialisation of @node. -*/ - smp_store_release(>next, node); + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(>locked);
[tip:locking/core] locking/qspinlock: Use smp_store_release() in queued_spin_unlock()
Commit-ID: 626e5fbc14358901ddaa90ce510e0fbeab310432 Gitweb: https://git.kernel.org/tip/626e5fbc14358901ddaa90ce510e0fbeab310432 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:24 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:51 +0200 locking/qspinlock: Use smp_store_release() in queued_spin_unlock() A qspinlock can be unlocked simply by writing zero to the locked byte. This can be implemented in the generic code, so do that and remove the arch-specific override for x86 in the !PV case. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-11-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 17 ++--- include/asm-generic/qspinlock.h | 2 +- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index da1370ad206d..3e70bed8a978 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -9,6 +9,12 @@ #define _Q_PENDING_LOOPS (1 << 9) +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); + #definequeued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock @@ -21,12 +27,6 @@ static inline void native_queued_spin_unlock(struct qspinlock *lock) smp_store_release(>locked, 0); } -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); - static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { pv_queued_spin_lock_slowpath(lock, val); @@ -42,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } -#else -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - native_queued_spin_unlock(lock); -} #endif #ifdef CONFIG_PARAVIRT diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h index b37b4ad7eb94..a8ed0a352d75 100644 --- a/include/asm-generic/qspinlock.h +++ b/include/asm-generic/qspinlock.h @@ -100,7 +100,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock) /* * unlock() needs release semantics: */ - (void)atomic_sub_return_release(_Q_LOCKED_VAL, >val); + smp_store_release(>locked, 0); } #endif
[tip:locking/core] locking/qspinlock: Elide back-to-back RELEASE operations with smp_wmb()
Commit-ID: 9d4646d14d51d62b967a12452c30ea7edf8dd8fa Gitweb: https://git.kernel.org/tip/9d4646d14d51d62b967a12452c30ea7edf8dd8fa Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:25 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:52 +0200 locking/qspinlock: Elide back-to-back RELEASE operations with smp_wmb() The qspinlock slowpath must ensure that the MCS node is fully initialised before it can be reached by another other CPU. This is currently enforced by using a RELEASE operation when updating the tail and also when linking the node into the waitqueue, since the control dependency off xchg_tail is insufficient to enforce sufficient ordering, see: 95bcade33a8a ("locking/qspinlock: Ensure node is initialised before updating prev->next") Back-to-back RELEASE operations may be expensive on some architectures, particularly those that implement them using fences under the hood. We can replace the two RELEASE operations with a single smp_wmb() fence and use RELAXED operations for the subsequent publishing of the node. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-12-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 33 + 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d6c3b029bd93..956a12983bd0 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -164,10 +164,10 @@ static __always_inline void clear_pending_set_locked(struct qspinlock *lock) static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) { /* -* Use release semantics to make sure that the MCS node is properly -* initialized before changing the tail code. +* We can use relaxed semantics since the caller ensures that the +* MCS node is properly initialized before updating the tail. */ - return (u32)xchg_release(>tail, + return (u32)xchg_relaxed(>tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET; } @@ -212,10 +212,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) for (;;) { new = (val & _Q_LOCKED_PENDING_MASK) | tail; /* -* Use release semantics to make sure that the MCS node is -* properly initialized before changing the tail code. +* We can use relaxed semantics since the caller ensures that +* the MCS node is properly initialized before updating the +* tail. */ - old = atomic_cmpxchg_release(>val, val, new); + old = atomic_cmpxchg_relaxed(>val, val, new); if (old == val) break; @@ -388,12 +389,18 @@ queue: goto release; /* +* Ensure that the initialisation of @node is complete before we +* publish the updated tail via xchg_tail() and potentially link +* @node into the waitqueue via WRITE_ONCE(prev->next, node) below. +*/ + smp_wmb(); + + /* +* Publish the updated tail. * We have already touched the queueing cacheline; don't bother with * pending stuff. * * p,*,* -> n,*,* -* -* RELEASE, such that the stores to @node must be complete. */ old = xchg_tail(lock, tail); next = NULL; @@ -405,14 +412,8 @@ queue: if (old & _Q_TAIL_MASK) { prev = decode_tail(old); - /* -* We must ensure that the stores to @node are observed before -* the write to prev->next. The address dependency from -* xchg_tail is not sufficient to ensure this because the read -* component of xchg_tail is unordered with respect to the -* initialisation of @node. -*/ - smp_store_release(>next, node); + /* Link @node into the waitqueue. */ + WRITE_ONCE(prev->next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(>locked);
[tip:locking/core] locking/qspinlock: Use smp_cond_load_relaxed() to wait for next node
Commit-ID: c131a198c497db436b558ac5e9a140cdcb91b304 Gitweb: https://git.kernel.org/tip/c131a198c497db436b558ac5e9a140cdcb91b304 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:23 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:50 +0200 locking/qspinlock: Use smp_cond_load_relaxed() to wait for next node When a locker reaches the head of the queue and takes the lock, a concurrent locker may enqueue and force the lock holder to spin whilst its node->next field is initialised. Rather than open-code a READ_ONCE/cpu_relax() loop, this can be implemented using smp_cond_load_relaxed() instead. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-10-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 56af1fa9874d..d6c3b029bd93 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -483,10 +483,8 @@ locked: /* * contended path; wait for next if not observed yet, release. */ - if (!next) { - while (!(next = READ_ONCE(node->next))) - cpu_relax(); - } + if (!next) + next = smp_cond_load_relaxed(>next, (VAL)); arch_mcs_spin_unlock_contended(>locked); pv_kick_node(lock, next);
[tip:locking/core] locking/qspinlock: Use smp_cond_load_relaxed() to wait for next node
Commit-ID: c131a198c497db436b558ac5e9a140cdcb91b304 Gitweb: https://git.kernel.org/tip/c131a198c497db436b558ac5e9a140cdcb91b304 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:23 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:50 +0200 locking/qspinlock: Use smp_cond_load_relaxed() to wait for next node When a locker reaches the head of the queue and takes the lock, a concurrent locker may enqueue and force the lock holder to spin whilst its node->next field is initialised. Rather than open-code a READ_ONCE/cpu_relax() loop, this can be implemented using smp_cond_load_relaxed() instead. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-10-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 56af1fa9874d..d6c3b029bd93 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -483,10 +483,8 @@ locked: /* * contended path; wait for next if not observed yet, release. */ - if (!next) { - while (!(next = READ_ONCE(node->next))) - cpu_relax(); - } + if (!next) + next = smp_cond_load_relaxed(>next, (VAL)); arch_mcs_spin_unlock_contended(>locked); pv_kick_node(lock, next);
[tip:locking/core] locking/qspinlock: Use atomic_cond_read_acquire()
Commit-ID: f9c811fac48cfbbfb452b08d1042386947868d07 Gitweb: https://git.kernel.org/tip/f9c811fac48cfbbfb452b08d1042386947868d07 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:21 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:49 +0200 locking/qspinlock: Use atomic_cond_read_acquire() Rather than dig into the counter field of the atomic_t inside the qspinlock structure so that we can call smp_cond_load_acquire(), use atomic_cond_read_acquire() instead, which operates on the atomic_t directly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-8-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index b51494a50b1e..56af1fa9874d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -337,8 +337,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * barriers. */ if (val & _Q_LOCKED_MASK) { - smp_cond_load_acquire(>val.counter, - !(VAL & _Q_LOCKED_MASK)); + atomic_cond_read_acquire(>val, +!(VAL & _Q_LOCKED_MASK)); } /* @@ -441,8 +441,8 @@ queue: * * The PV pv_wait_head_or_lock function, if active, will acquire * the lock and return a non-zero value. So we have to skip the -* smp_cond_load_acquire() call. As the next PV queue head hasn't been -* designated yet, there is no way for the locked value to become +* atomic_cond_read_acquire() call. As the next PV queue head hasn't +* been designated yet, there is no way for the locked value to become * _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * @@ -452,7 +452,7 @@ queue: if ((val = pv_wait_head_or_lock(lock, node))) goto locked; - val = smp_cond_load_acquire(>val.counter, !(VAL & _Q_LOCKED_PENDING_MASK)); + val = atomic_cond_read_acquire(>val, !(VAL & _Q_LOCKED_PENDING_MASK)); locked: /* @@ -469,7 +469,7 @@ locked: /* In the PV case we might already have _Q_LOCKED_VAL set */ if ((val & _Q_TAIL_MASK) == tail) { /* -* The smp_cond_load_acquire() call above has provided the +* The atomic_cond_read_acquire() call above has provided the * necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL);
[tip:locking/core] locking/qspinlock: Use atomic_cond_read_acquire()
Commit-ID: f9c811fac48cfbbfb452b08d1042386947868d07 Gitweb: https://git.kernel.org/tip/f9c811fac48cfbbfb452b08d1042386947868d07 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:21 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:49 +0200 locking/qspinlock: Use atomic_cond_read_acquire() Rather than dig into the counter field of the atomic_t inside the qspinlock structure so that we can call smp_cond_load_acquire(), use atomic_cond_read_acquire() instead, which operates on the atomic_t directly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-8-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index b51494a50b1e..56af1fa9874d 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -337,8 +337,8 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) * barriers. */ if (val & _Q_LOCKED_MASK) { - smp_cond_load_acquire(>val.counter, - !(VAL & _Q_LOCKED_MASK)); + atomic_cond_read_acquire(>val, +!(VAL & _Q_LOCKED_MASK)); } /* @@ -441,8 +441,8 @@ queue: * * The PV pv_wait_head_or_lock function, if active, will acquire * the lock and return a non-zero value. So we have to skip the -* smp_cond_load_acquire() call. As the next PV queue head hasn't been -* designated yet, there is no way for the locked value to become +* atomic_cond_read_acquire() call. As the next PV queue head hasn't +* been designated yet, there is no way for the locked value to become * _Q_SLOW_VAL. So both the set_locked() and the * atomic_cmpxchg_relaxed() calls will be safe. * @@ -452,7 +452,7 @@ queue: if ((val = pv_wait_head_or_lock(lock, node))) goto locked; - val = smp_cond_load_acquire(>val.counter, !(VAL & _Q_LOCKED_PENDING_MASK)); + val = atomic_cond_read_acquire(>val, !(VAL & _Q_LOCKED_PENDING_MASK)); locked: /* @@ -469,7 +469,7 @@ locked: /* In the PV case we might already have _Q_LOCKED_VAL set */ if ((val & _Q_TAIL_MASK) == tail) { /* -* The smp_cond_load_acquire() call above has provided the +* The atomic_cond_read_acquire() call above has provided the * necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL);
[tip:locking/core] locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue
Commit-ID: c61da58d8a9ba9238250a548f00826eaf44af0f7 Gitweb: https://git.kernel.org/tip/c61da58d8a9ba9238250a548f00826eaf44af0f7 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:20 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:48 +0200 locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue When a queued locker reaches the head of the queue, it claims the lock by setting _Q_LOCKED_VAL in the lockword. If there isn't contention, it must also clear the tail as part of this operation so that subsequent lockers can avoid taking the slowpath altogether. Currently this is expressed as a cmpxchg() loop that practically only runs up to two iterations. This is confusing to the reader and unhelpful to the compiler. Rewrite the cmpxchg() loop without the loop, so that a failed cmpxchg() implies that there is contention and we just need to write to _Q_LOCKED_VAL without considering the rest of the lockword. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-7-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index e06f67e021d9..b51494a50b1e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -465,24 +465,21 @@ locked: * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock. */ - for (;;) { - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { - set_locked(lock); - break; - } + + /* In the PV case we might already have _Q_LOCKED_VAL set */ + if ((val & _Q_TAIL_MASK) == tail) { /* * The smp_cond_load_acquire() call above has provided the -* necessary acquire semantics required for locking. At most -* two iterations of this loop may be ran. +* necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL); if (old == val) - goto release; /* No contention */ - - val = old; + goto release; /* No contention */ } + /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ + set_locked(lock); + /* * contended path; wait for next if not observed yet, release. */
[tip:locking/core] locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue
Commit-ID: c61da58d8a9ba9238250a548f00826eaf44af0f7 Gitweb: https://git.kernel.org/tip/c61da58d8a9ba9238250a548f00826eaf44af0f7 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:20 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:48 +0200 locking/qspinlock: Kill cmpxchg() loop when claiming lock from head of queue When a queued locker reaches the head of the queue, it claims the lock by setting _Q_LOCKED_VAL in the lockword. If there isn't contention, it must also clear the tail as part of this operation so that subsequent lockers can avoid taking the slowpath altogether. Currently this is expressed as a cmpxchg() loop that practically only runs up to two iterations. This is confusing to the reader and unhelpful to the compiler. Rewrite the cmpxchg() loop without the loop, so that a failed cmpxchg() implies that there is contention and we just need to write to _Q_LOCKED_VAL without considering the rest of the lockword. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-7-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 19 --- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index e06f67e021d9..b51494a50b1e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -465,24 +465,21 @@ locked: * and nobody is pending, clear the tail code and grab the lock. * Otherwise, we only need to grab the lock. */ - for (;;) { - /* In the PV case we might already have _Q_LOCKED_VAL set */ - if ((val & _Q_TAIL_MASK) != tail || (val & _Q_PENDING_MASK)) { - set_locked(lock); - break; - } + + /* In the PV case we might already have _Q_LOCKED_VAL set */ + if ((val & _Q_TAIL_MASK) == tail) { /* * The smp_cond_load_acquire() call above has provided the -* necessary acquire semantics required for locking. At most -* two iterations of this loop may be ran. +* necessary acquire semantics required for locking. */ old = atomic_cmpxchg_relaxed(>val, val, _Q_LOCKED_VAL); if (old == val) - goto release; /* No contention */ - - val = old; + goto release; /* No contention */ } + /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ + set_locked(lock); + /* * contended path; wait for next if not observed yet, release. */
[tip:locking/core] locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath
Commit-ID: 59fb586b4a07b4e1a0ee577140ab4842ba451acd Gitweb: https://git.kernel.org/tip/59fb586b4a07b4e1a0ee577140ab4842ba451acd Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:19 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:47 +0200 locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath The qspinlock locking slowpath utilises a "pending" bit as a simple form of an embedded test-and-set lock that can avoid the overhead of explicit queuing in cases where the lock is held but uncontended. This bit is managed using a cmpxchg() loop which tries to transition the uncontended lock word from (0,0,0) -> (0,0,1) or (0,0,1) -> (0,1,1). Unfortunately, the cmpxchg() loop is unbounded and lockers can be starved indefinitely if the lock word is seen to oscillate between unlocked (0,0,0) and locked (0,0,1). This could happen if concurrent lockers are able to take the lock in the cmpxchg() loop without queuing and pass it around amongst themselves. This patch fixes the problem by unconditionally setting _Q_PENDING_VAL using atomic_fetch_or, and then inspecting the old value to see whether we need to spin on the current lock owner, or whether we now effectively hold the lock. The tricky scenario is when concurrent lockers end up queuing on the lock and the lock becomes available, causing us to see a lockword of (n,0,0). With pending now set, simply queuing could lead to deadlock as the head of the queue may not have observed the pending flag being cleared. Conversely, if the head of the queue did observe pending being cleared, then it could transition the lock from (n,0,0) -> (0,0,1) meaning that any attempt to "undo" our setting of the pending bit could race with a concurrent locker trying to set it. We handle this race by preserving the pending bit when taking the lock after reaching the head of the queue and leaving the tail entry intact if we saw pending set, because we know that the tail is going to be updated shortly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 102 kernel/locking/qspinlock_paravirt.h | 5 -- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index a0f7976348f8..e06f67e021d9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -127,6 +127,17 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) #if _Q_PENDING_BITS == 8 +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -162,6 +173,17 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) #else /* _Q_PENDING_BITS == 8 */ +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_andnot(_Q_PENDING_VAL, >val); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -266,7 +288,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; - u32 new, old, tail; + u32 old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -289,59 +311,51 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) (VAL != _Q_PENDING_VAL) || !cnt--); } + /* +* If we observe any contention; queue. +*/ + if (val & ~_Q_LOCKED_MASK) + goto queue; + /* * trylock || pending * * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - for (;;) { + val = atomic_fetch_or_acquire(_Q_PENDING_VAL, >val); + if (!(val & ~_Q_LOCKED_MASK)) { /* -* If we observe any contention; queue. +* We're pending, wait for the owner to go away. +* +
[tip:locking/core] locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath
Commit-ID: 59fb586b4a07b4e1a0ee577140ab4842ba451acd Gitweb: https://git.kernel.org/tip/59fb586b4a07b4e1a0ee577140ab4842ba451acd Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:19 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:47 +0200 locking/qspinlock: Remove unbounded cmpxchg() loop from locking slowpath The qspinlock locking slowpath utilises a "pending" bit as a simple form of an embedded test-and-set lock that can avoid the overhead of explicit queuing in cases where the lock is held but uncontended. This bit is managed using a cmpxchg() loop which tries to transition the uncontended lock word from (0,0,0) -> (0,0,1) or (0,0,1) -> (0,1,1). Unfortunately, the cmpxchg() loop is unbounded and lockers can be starved indefinitely if the lock word is seen to oscillate between unlocked (0,0,0) and locked (0,0,1). This could happen if concurrent lockers are able to take the lock in the cmpxchg() loop without queuing and pass it around amongst themselves. This patch fixes the problem by unconditionally setting _Q_PENDING_VAL using atomic_fetch_or, and then inspecting the old value to see whether we need to spin on the current lock owner, or whether we now effectively hold the lock. The tricky scenario is when concurrent lockers end up queuing on the lock and the lock becomes available, causing us to see a lockword of (n,0,0). With pending now set, simply queuing could lead to deadlock as the head of the queue may not have observed the pending flag being cleared. Conversely, if the head of the queue did observe pending being cleared, then it could transition the lock from (n,0,0) -> (0,0,1) meaning that any attempt to "undo" our setting of the pending bit could race with a concurrent locker trying to set it. We handle this race by preserving the pending bit when taking the lock after reaching the head of the queue and leaving the tail entry intact if we saw pending set, because we know that the tail is going to be updated shortly. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 102 kernel/locking/qspinlock_paravirt.h | 5 -- 2 files changed, 58 insertions(+), 49 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index a0f7976348f8..e06f67e021d9 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -127,6 +127,17 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) #if _Q_PENDING_BITS == 8 +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + WRITE_ONCE(lock->pending, 0); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -162,6 +173,17 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) #else /* _Q_PENDING_BITS == 8 */ +/** + * clear_pending - clear the pending bit. + * @lock: Pointer to queued spinlock structure + * + * *,1,* -> *,0,* + */ +static __always_inline void clear_pending(struct qspinlock *lock) +{ + atomic_andnot(_Q_PENDING_VAL, >val); +} + /** * clear_pending_set_locked - take ownership and clear the pending bit. * @lock: Pointer to queued spinlock structure @@ -266,7 +288,7 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock, void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { struct mcs_spinlock *prev, *next, *node; - u32 new, old, tail; + u32 old, tail; int idx; BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS)); @@ -289,59 +311,51 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) (VAL != _Q_PENDING_VAL) || !cnt--); } + /* +* If we observe any contention; queue. +*/ + if (val & ~_Q_LOCKED_MASK) + goto queue; + /* * trylock || pending * * 0,0,0 -> 0,0,1 ; trylock * 0,0,1 -> 0,1,1 ; pending */ - for (;;) { + val = atomic_fetch_or_acquire(_Q_PENDING_VAL, >val); + if (!(val & ~_Q_LOCKED_MASK)) { /* -* If we observe any contention; queue. +* We're pending, wait for the owner to go away. +* +* *,1,1 -> *,1,0 +* +* this wait loop must be a load-acquire such that we match the +* store-release that
[tip:locking/core] locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound
Commit-ID: b247be3fe89b6aba928bf80f4453d1c4ba8d2063 Gitweb: https://git.kernel.org/tip/b247be3fe89b6aba928bf80f4453d1c4ba8d2063 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:18 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:47 +0200 locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound On x86, atomic_cond_read_relaxed will busy-wait with a cpu_relax() loop, so it is desirable to increase the number of times we spin on the qspinlock lockword when it is found to be transitioning from pending to locked. According to Waiman Long: | Ideally, the spinning times should be at least a few times the typical | cacheline load time from memory which I think can be down to 100ns or | so for each cacheline load with the newest systems or up to several | hundreds ns for older systems. which in his benchmarking corresponded to 512 iterations. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 90b0b0ed8161..da1370ad206d 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -7,6 +7,8 @@ #include #include +#define _Q_PENDING_LOOPS (1 << 9) + #definequeued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock
[tip:locking/core] locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound
Commit-ID: b247be3fe89b6aba928bf80f4453d1c4ba8d2063 Gitweb: https://git.kernel.org/tip/b247be3fe89b6aba928bf80f4453d1c4ba8d2063 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:18 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:47 +0200 locking/qspinlock/x86: Increase _Q_PENDING_LOOPS upper bound On x86, atomic_cond_read_relaxed will busy-wait with a cpu_relax() loop, so it is desirable to increase the number of times we spin on the qspinlock lockword when it is found to be transitioning from pending to locked. According to Waiman Long: | Ideally, the spinning times should be at least a few times the typical | cacheline load time from memory which I think can be down to 100ns or | so for each cacheline load with the newest systems or up to several | hundreds ns for older systems. which in his benchmarking corresponded to 512 iterations. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 90b0b0ed8161..da1370ad206d 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -7,6 +7,8 @@ #include #include +#define _Q_PENDING_LOOPS (1 << 9) + #definequeued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock
[tip:locking/core] locking/qspinlock: Bound spinning on pending->locked transition in slowpath
Commit-ID: 6512276d97b160d90b53285bd06f7f201459a7e3 Gitweb: https://git.kernel.org/tip/6512276d97b160d90b53285bd06f7f201459a7e3 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:17 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:46 +0200 locking/qspinlock: Bound spinning on pending->locked transition in slowpath If a locker taking the qspinlock slowpath reads a lock value indicating that only the pending bit is set, then it will spin whilst the concurrent pending->locked transition takes effect. Unfortunately, there is no guarantee that such a transition will ever be observed since concurrent lockers could continuously set pending and hand over the lock amongst themselves, leading to starvation. Whilst this would probably resolve in practice, it means that it is not possible to prove liveness properties about the lock and means that lock acquisition time is unbounded. Rather than removing the pending->locked spinning from the slowpath altogether (which has been shown to heavily penalise a 2-threaded locking stress test on x86), this patch replaces the explicit spinning with a call to atomic_cond_read_relaxed and allows the architecture to provide a bound on the number of spins. For architectures that can respond to changes in cacheline state in their smp_cond_load implementation, it should be sufficient to use the default bound of 1. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 20 +--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index f5b0e59f6d14..a0f7976348f8 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -76,6 +76,18 @@ #define MAX_NODES 4 #endif +/* + * The pending bit spinning loop count. + * This heuristic is used to limit the number of lockword accesses + * made by atomic_cond_read_relaxed when waiting for the lock to + * transition out of the "== _Q_PENDING_VAL" state. We don't spin + * indefinitely because there's no guarantee that we'll make forward + * progress. + */ +#ifndef _Q_PENDING_LOOPS +#define _Q_PENDING_LOOPS 1 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. @@ -266,13 +278,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) return; /* -* wait for in-progress pending->locked hand-overs +* Wait for in-progress pending->locked hand-overs with a bounded +* number of spins so that we guarantee forward progress. * * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) { - while ((val = atomic_read(>val)) == _Q_PENDING_VAL) - cpu_relax(); + int cnt = _Q_PENDING_LOOPS; + val = atomic_cond_read_relaxed(>val, + (VAL != _Q_PENDING_VAL) || !cnt--); } /*
[tip:locking/core] locking/qspinlock: Bound spinning on pending->locked transition in slowpath
Commit-ID: 6512276d97b160d90b53285bd06f7f201459a7e3 Gitweb: https://git.kernel.org/tip/6512276d97b160d90b53285bd06f7f201459a7e3 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:17 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:46 +0200 locking/qspinlock: Bound spinning on pending->locked transition in slowpath If a locker taking the qspinlock slowpath reads a lock value indicating that only the pending bit is set, then it will spin whilst the concurrent pending->locked transition takes effect. Unfortunately, there is no guarantee that such a transition will ever be observed since concurrent lockers could continuously set pending and hand over the lock amongst themselves, leading to starvation. Whilst this would probably resolve in practice, it means that it is not possible to prove liveness properties about the lock and means that lock acquisition time is unbounded. Rather than removing the pending->locked spinning from the slowpath altogether (which has been shown to heavily penalise a 2-threaded locking stress test on x86), this patch replaces the explicit spinning with a call to atomic_cond_read_relaxed and allows the architecture to provide a bound on the number of spins. For architectures that can respond to changes in cacheline state in their smp_cond_load implementation, it should be sufficient to use the default bound of 1. Suggested-by: Waiman Long Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 20 +--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index f5b0e59f6d14..a0f7976348f8 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -76,6 +76,18 @@ #define MAX_NODES 4 #endif +/* + * The pending bit spinning loop count. + * This heuristic is used to limit the number of lockword accesses + * made by atomic_cond_read_relaxed when waiting for the lock to + * transition out of the "== _Q_PENDING_VAL" state. We don't spin + * indefinitely because there's no guarantee that we'll make forward + * progress. + */ +#ifndef _Q_PENDING_LOOPS +#define _Q_PENDING_LOOPS 1 +#endif + /* * Per-CPU queue node structures; we can never have more than 4 nested * contexts: task, softirq, hardirq, nmi. @@ -266,13 +278,15 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) return; /* -* wait for in-progress pending->locked hand-overs +* Wait for in-progress pending->locked hand-overs with a bounded +* number of spins so that we guarantee forward progress. * * 0,1,0 -> 0,0,1 */ if (val == _Q_PENDING_VAL) { - while ((val = atomic_read(>val)) == _Q_PENDING_VAL) - cpu_relax(); + int cnt = _Q_PENDING_LOOPS; + val = atomic_cond_read_relaxed(>val, + (VAL != _Q_PENDING_VAL) || !cnt--); } /*
[tip:locking/core] locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'
Commit-ID: 625e88be1f41b53cec55827c984e4a89ea8ee9f9 Gitweb: https://git.kernel.org/tip/625e88be1f41b53cec55827c984e4a89ea8ee9f9 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:16 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:45 +0200 locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock' 'struct __qspinlock' provides a handy union of fields so that subcomponents of the lockword can be accessed by name, without having to manage shifts and masks explicitly and take endianness into account. This is useful in qspinlock.h and also potentially in arch headers, so move the 'struct __qspinlock' into 'struct qspinlock' and kill the extra definition. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Acked-by: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 2 +- arch/x86/include/asm/qspinlock_paravirt.h | 3 +- include/asm-generic/qspinlock_types.h | 32 +++-- kernel/locking/qspinlock.c| 46 ++- kernel/locking/qspinlock_paravirt.h | 34 --- 5 files changed, 46 insertions(+), 71 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 5e16b5d40d32..90b0b0ed8161 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -16,7 +16,7 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(>locked, 0); } #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..9ef5ee03d2d7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 034acd0c4956..0763f065b975 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -29,13 +29,41 @@ #endif typedef struct qspinlock { - atomic_tval; + union { + atomic_t val; + + /* +* By using the whole 2nd least significant byte for the +* pending bit, we can allow better optimization of the lock +* acquisition for the pending bit holder. +*/ +#ifdef __LITTLE_ENDIAN + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; } arch_spinlock_t; /* * Initializier */ -#define__ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } +#define__ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } /* * Bitfields in the atomic value: diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d880296245c5..f5b0e59f6d14 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -114,40 +114,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) -/* - * By using the whole 2nd least significant byte for the pending bit, we - * can allow better optimization of the lock acquisition for the pending - * bit holder. - * - * This internal structure is also used by the set_locked function which - * is not restricted to _Q_PENDING_BITS == 8. - */ -struct __qspinlock { - union { - atomic_t val; -#ifdef __LITTLE_ENDIAN - struct { - u8 locked; - u8 pending; - }; - struct { - u16
[tip:locking/core] locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock'
Commit-ID: 625e88be1f41b53cec55827c984e4a89ea8ee9f9 Gitweb: https://git.kernel.org/tip/625e88be1f41b53cec55827c984e4a89ea8ee9f9 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:16 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:45 +0200 locking/qspinlock: Merge 'struct __qspinlock' into 'struct qspinlock' 'struct __qspinlock' provides a handy union of fields so that subcomponents of the lockword can be accessed by name, without having to manage shifts and masks explicitly and take endianness into account. This is useful in qspinlock.h and also potentially in arch headers, so move the 'struct __qspinlock' into 'struct qspinlock' and kill the extra definition. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Acked-by: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/qspinlock.h | 2 +- arch/x86/include/asm/qspinlock_paravirt.h | 3 +- include/asm-generic/qspinlock_types.h | 32 +++-- kernel/locking/qspinlock.c| 46 ++- kernel/locking/qspinlock_paravirt.h | 34 --- 5 files changed, 46 insertions(+), 71 deletions(-) diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 5e16b5d40d32..90b0b0ed8161 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -16,7 +16,7 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(>locked, 0); } #ifdef CONFIG_PARAVIRT_SPINLOCKS diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..9ef5ee03d2d7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; diff --git a/include/asm-generic/qspinlock_types.h b/include/asm-generic/qspinlock_types.h index 034acd0c4956..0763f065b975 100644 --- a/include/asm-generic/qspinlock_types.h +++ b/include/asm-generic/qspinlock_types.h @@ -29,13 +29,41 @@ #endif typedef struct qspinlock { - atomic_tval; + union { + atomic_t val; + + /* +* By using the whole 2nd least significant byte for the +* pending bit, we can allow better optimization of the lock +* acquisition for the pending bit holder. +*/ +#ifdef __LITTLE_ENDIAN + struct { + u8 locked; + u8 pending; + }; + struct { + u16 locked_pending; + u16 tail; + }; +#else + struct { + u16 tail; + u16 locked_pending; + }; + struct { + u8 reserved[2]; + u8 pending; + u8 locked; + }; +#endif + }; } arch_spinlock_t; /* * Initializier */ -#define__ARCH_SPIN_LOCK_UNLOCKED { ATOMIC_INIT(0) } +#define__ARCH_SPIN_LOCK_UNLOCKED { .val = ATOMIC_INIT(0) } /* * Bitfields in the atomic value: diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index d880296245c5..f5b0e59f6d14 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -114,40 +114,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) -/* - * By using the whole 2nd least significant byte for the pending bit, we - * can allow better optimization of the lock acquisition for the pending - * bit holder. - * - * This internal structure is also used by the set_locked function which - * is not restricted to _Q_PENDING_BITS == 8. - */ -struct __qspinlock { - union { - atomic_t val; -#ifdef __LITTLE_ENDIAN - struct { - u8 locked; - u8 pending; - }; - struct { - u16 locked_pending; - u16 tail; - }; -#else - struct { - u16 tail; - u16 locked_pending; -
[tip:locking/core] locking/barriers: Introduce smp_cond_load_relaxed() and atomic_cond_read_relaxed()
Commit-ID: fcfdfe30e324725007e9dc5814b62a4c430ea909 Gitweb: https://git.kernel.org/tip/fcfdfe30e324725007e9dc5814b62a4c430ea909 Author: Will DeaconAuthorDate: Thu, 26 Apr 2018 11:34:15 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:44 +0200 locking/barriers: Introduce smp_cond_load_relaxed() and atomic_cond_read_relaxed() Whilst we currently provide smp_cond_load_acquire() and atomic_cond_read_acquire(), there are cases where the ACQUIRE semantics are not required because of a subsequent fence or release operation once the conditional loop has exited. This patch adds relaxed versions of the conditional spinning primitives to avoid unnecessary barrier overhead on architectures such as arm64. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 2 ++ include/asm-generic/barrier.h | 27 +-- include/linux/atomic.h| 2 ++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 34a028a7bcc5..5b2b0b5ea06d 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -244,6 +244,8 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) +#define atomic_long_cond_read_relaxed(v, c) \ + ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c)) #define atomic_long_cond_read_acquire(v, c) \ ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 29458bbb2fa0..2cafdbb9ae4c 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -221,18 +221,17 @@ do { \ #endif /** - * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees * @ptr: pointer to the variable to wait on * @cond: boolean expression to wait for * - * Equivalent to using smp_load_acquire() on the condition variable but employs - * the control dependency of the wait to reduce the barrier on many platforms. + * Equivalent to using READ_ONCE() on the condition variable. * * Due to C lacking lambda expressions we load the value of *ptr into a * pre-named variable @VAL to be used in @cond. */ -#ifndef smp_cond_load_acquire -#define smp_cond_load_acquire(ptr, cond_expr) ({ \ +#ifndef smp_cond_load_relaxed +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ typeof(ptr) __PTR = (ptr); \ typeof(*ptr) VAL; \ for (;;) { \ @@ -241,10 +240,26 @@ do { \ break; \ cpu_relax();\ } \ - smp_acquire__after_ctrl_dep(); \ VAL;\ }) #endif +/** + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + */ +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + typeof(*ptr) _val; \ + _val = smp_cond_load_relaxed(ptr, cond_expr); \ + smp_acquire__after_ctrl_dep(); \ + _val; \ +}) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 8b276fd9a127..01ce3997cb42 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -654,6 +654,7 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) #define atomic_cond_read_acquire(v, c)
[tip:locking/core] locking/barriers: Introduce smp_cond_load_relaxed() and atomic_cond_read_relaxed()
Commit-ID: fcfdfe30e324725007e9dc5814b62a4c430ea909 Gitweb: https://git.kernel.org/tip/fcfdfe30e324725007e9dc5814b62a4c430ea909 Author: Will Deacon AuthorDate: Thu, 26 Apr 2018 11:34:15 +0100 Committer: Ingo Molnar CommitDate: Fri, 27 Apr 2018 09:48:44 +0200 locking/barriers: Introduce smp_cond_load_relaxed() and atomic_cond_read_relaxed() Whilst we currently provide smp_cond_load_acquire() and atomic_cond_read_acquire(), there are cases where the ACQUIRE semantics are not required because of a subsequent fence or release operation once the conditional loop has exited. This patch adds relaxed versions of the conditional spinning primitives to avoid unnecessary barrier overhead on architectures such as arm64. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Acked-by: Waiman Long Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1524738868-31318-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 2 ++ include/asm-generic/barrier.h | 27 +-- include/linux/atomic.h| 2 ++ 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 34a028a7bcc5..5b2b0b5ea06d 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -244,6 +244,8 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) +#define atomic_long_cond_read_relaxed(v, c) \ + ATOMIC_LONG_PFX(_cond_read_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (c)) #define atomic_long_cond_read_acquire(v, c) \ ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h index 29458bbb2fa0..2cafdbb9ae4c 100644 --- a/include/asm-generic/barrier.h +++ b/include/asm-generic/barrier.h @@ -221,18 +221,17 @@ do { \ #endif /** - * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * smp_cond_load_relaxed() - (Spin) wait for cond with no ordering guarantees * @ptr: pointer to the variable to wait on * @cond: boolean expression to wait for * - * Equivalent to using smp_load_acquire() on the condition variable but employs - * the control dependency of the wait to reduce the barrier on many platforms. + * Equivalent to using READ_ONCE() on the condition variable. * * Due to C lacking lambda expressions we load the value of *ptr into a * pre-named variable @VAL to be used in @cond. */ -#ifndef smp_cond_load_acquire -#define smp_cond_load_acquire(ptr, cond_expr) ({ \ +#ifndef smp_cond_load_relaxed +#define smp_cond_load_relaxed(ptr, cond_expr) ({ \ typeof(ptr) __PTR = (ptr); \ typeof(*ptr) VAL; \ for (;;) { \ @@ -241,10 +240,26 @@ do { \ break; \ cpu_relax();\ } \ - smp_acquire__after_ctrl_dep(); \ VAL;\ }) #endif +/** + * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering + * @ptr: pointer to the variable to wait on + * @cond: boolean expression to wait for + * + * Equivalent to using smp_load_acquire() on the condition variable but employs + * the control dependency of the wait to reduce the barrier on many platforms. + */ +#ifndef smp_cond_load_acquire +#define smp_cond_load_acquire(ptr, cond_expr) ({ \ + typeof(*ptr) _val; \ + _val = smp_cond_load_relaxed(ptr, cond_expr); \ + smp_acquire__after_ctrl_dep(); \ + _val; \ +}) +#endif + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_GENERIC_BARRIER_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 8b276fd9a127..01ce3997cb42 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -654,6 +654,7 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_relaxed(v, c) smp_cond_load_relaxed(&(v)->counter, (c)) #define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) #ifdef CONFIG_GENERIC_ATOMIC64 @@ -1075,6 +1076,7 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t
[tip:x86/pti] nospec: Move array_index_nospec() parameter checking into separate macro
Commit-ID: 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 Gitweb: https://git.kernel.org/tip/8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 Author: Will DeaconAuthorDate: Mon, 5 Feb 2018 14:16:06 + Committer: Ingo Molnar CommitDate: Thu, 15 Feb 2018 01:15:51 +0100 nospec: Move array_index_nospec() parameter checking into separate macro For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 36 +--- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced..fbc98e2 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { /* -* Warn developers about inappropriate array_index_nospec() usage. -* -* Even if the CPU speculates past the WARN_ONCE branch, the -* sign bit of @index is taken into account when generating the -* mask. -* -* This warning is compiled out when the compiler can infer that -* @index and @size are less than LONG_MAX. -*/ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take * into account the value of @index under speculation. @@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #endif /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + +/* * array_index_nospec - sanitize an array index after a bounds check * * For a code sequence like: @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s);\ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long));\ BUILD_BUG_ON(sizeof(_s) > sizeof(long));\
[tip:x86/pti] nospec: Move array_index_nospec() parameter checking into separate macro
Commit-ID: 8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 Gitweb: https://git.kernel.org/tip/8fa80c503b484ddc1abbd10c7cb2ab81f3824a50 Author: Will Deacon AuthorDate: Mon, 5 Feb 2018 14:16:06 + Committer: Ingo Molnar CommitDate: Thu, 15 Feb 2018 01:15:51 +0100 nospec: Move array_index_nospec() parameter checking into separate macro For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 36 +--- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced..fbc98e2 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { /* -* Warn developers about inappropriate array_index_nospec() usage. -* -* Even if the CPU speculates past the WARN_ONCE branch, the -* sign bit of @index is taken into account when generating the -* mask. -* -* This warning is compiled out when the compiler can infer that -* @index and @size are less than LONG_MAX. -*/ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take * into account the value of @index under speculation. @@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #endif /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + +/* * array_index_nospec - sanitize an array index after a bounds check * * For a code sequence like: @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s);\ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long));\ BUILD_BUG_ON(sizeof(_s) > sizeof(long));\
[tip:locking/urgent] locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit()
Commit-ID: 61e02392d3c7ecac1f91c0a90a8043d67e081846 Gitweb: https://git.kernel.org/tip/61e02392d3c7ecac1f91c0a90a8043d67e081846 Author: Will DeaconAuthorDate: Tue, 13 Feb 2018 13:30:19 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:55:53 +0100 locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit() A test_and_{}_bit() operation fails if the value of the bit is such that the modification does not take place. For example, if test_and_set_bit() returns 1. In these cases, follow the behaviour of cmpxchg and allow the operation to be unordered. This also applies to test_and_set_bit_lock() if the lock is found to be be taken already. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528619-20049-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- Documentation/atomic_bitops.txt | 7 ++- include/asm-generic/bitops/lock.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 5550bfdc..be70b32 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. -Except for test_and_set_bit_lock() which has ACQUIRE semantics and + - RMW operations that are conditional are unordered on FAILURE, + otherwise the above rules apply. In the case of test_and_{}_bit() operations, + if the bit in memory is unchanged by the operation then it is deemed to have + failed. + +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and clear_bit_unlock() which has RELEASE semantics. Since a platform only has a single means of achieving atomic operations diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index bc39757..67ab280 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -7,7 +7,8 @@ * @nr: Bit to set * @addr: Address to count from * - * This operation is atomic and provides acquire barrier semantics. + * This operation is atomic and provides acquire barrier semantics if + * the returned value is 0. * It can be used to implement bit locks. */ #define test_and_set_bit_lock(nr, addr)test_and_set_bit(nr, addr)
[tip:locking/urgent] locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit()
Commit-ID: 61e02392d3c7ecac1f91c0a90a8043d67e081846 Gitweb: https://git.kernel.org/tip/61e02392d3c7ecac1f91c0a90a8043d67e081846 Author: Will Deacon AuthorDate: Tue, 13 Feb 2018 13:30:19 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:55:53 +0100 locking/atomic/bitops: Document and clarify ordering semantics for failed test_and_{}_bit() A test_and_{}_bit() operation fails if the value of the bit is such that the modification does not take place. For example, if test_and_set_bit() returns 1. In these cases, follow the behaviour of cmpxchg and allow the operation to be unordered. This also applies to test_and_set_bit_lock() if the lock is found to be be taken already. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528619-20049-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- Documentation/atomic_bitops.txt | 7 ++- include/asm-generic/bitops/lock.h | 3 ++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index 5550bfdc..be70b32 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,7 +58,12 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. -Except for test_and_set_bit_lock() which has ACQUIRE semantics and + - RMW operations that are conditional are unordered on FAILURE, + otherwise the above rules apply. In the case of test_and_{}_bit() operations, + if the bit in memory is unchanged by the operation then it is deemed to have + failed. + +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and clear_bit_unlock() which has RELEASE semantics. Since a platform only has a single means of achieving atomic operations diff --git a/include/asm-generic/bitops/lock.h b/include/asm-generic/bitops/lock.h index bc39757..67ab280 100644 --- a/include/asm-generic/bitops/lock.h +++ b/include/asm-generic/bitops/lock.h @@ -7,7 +7,8 @@ * @nr: Bit to set * @addr: Address to count from * - * This operation is atomic and provides acquire barrier semantics. + * This operation is atomic and provides acquire barrier semantics if + * the returned value is 0. * It can be used to implement bit locks. */ #define test_and_set_bit_lock(nr, addr)test_and_set_bit(nr, addr)
[tip:x86/pti] nospec: Move array_index_nospec() parameter checking into separate macro
Commit-ID: 2963962dc910e57becfa0bb3df013b1d5c23179a Gitweb: https://git.kernel.org/tip/2963962dc910e57becfa0bb3df013b1d5c23179a Author: Will DeaconAuthorDate: Mon, 5 Feb 2018 14:16:06 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:22:44 +0100 nospec: Move array_index_nospec() parameter checking into separate macro For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 36 +--- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced..fbc98e2 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { /* -* Warn developers about inappropriate array_index_nospec() usage. -* -* Even if the CPU speculates past the WARN_ONCE branch, the -* sign bit of @index is taken into account when generating the -* mask. -* -* This warning is compiled out when the compiler can infer that -* @index and @size are less than LONG_MAX. -*/ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take * into account the value of @index under speculation. @@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #endif /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + +/* * array_index_nospec - sanitize an array index after a bounds check * * For a code sequence like: @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s);\ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long));\ BUILD_BUG_ON(sizeof(_s) > sizeof(long));\
[tip:x86/pti] nospec: Move array_index_nospec() parameter checking into separate macro
Commit-ID: 2963962dc910e57becfa0bb3df013b1d5c23179a Gitweb: https://git.kernel.org/tip/2963962dc910e57becfa0bb3df013b1d5c23179a Author: Will Deacon AuthorDate: Mon, 5 Feb 2018 14:16:06 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:22:44 +0100 nospec: Move array_index_nospec() parameter checking into separate macro For architectures providing their own implementation of array_index_mask_nospec() in asm/barrier.h, attempting to use WARN_ONCE() to complain about out-of-range parameters using WARN_ON() results in a mess of mutually-dependent include files. Rather than unpick the dependencies, simply have the core code in nospec.h perform the checking for us. Signed-off-by: Will Deacon Acked-by: Thomas Gleixner Cc: Dan Williams Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1517840166-15399-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/nospec.h | 36 +--- 1 file changed, 21 insertions(+), 15 deletions(-) diff --git a/include/linux/nospec.h b/include/linux/nospec.h index b99bced..fbc98e2 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -20,20 +20,6 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, unsigned long size) { /* -* Warn developers about inappropriate array_index_nospec() usage. -* -* Even if the CPU speculates past the WARN_ONCE branch, the -* sign bit of @index is taken into account when generating the -* mask. -* -* This warning is compiled out when the compiler can infer that -* @index and @size are less than LONG_MAX. -*/ - if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, - "array_index_nospec() limited to range of [0, LONG_MAX]\n")) - return 0; - - /* * Always calculate and emit the mask even if the compiler * thinks the mask is not needed. The compiler does not take * into account the value of @index under speculation. @@ -44,6 +30,26 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, #endif /* + * Warn developers about inappropriate array_index_nospec() usage. + * + * Even if the CPU speculates past the WARN_ONCE branch, the + * sign bit of @index is taken into account when generating the + * mask. + * + * This warning is compiled out when the compiler can infer that + * @index and @size are less than LONG_MAX. + */ +#define array_index_mask_nospec_check(index, size) \ +({ \ + if (WARN_ONCE(index > LONG_MAX || size > LONG_MAX, \ + "array_index_nospec() limited to range of [0, LONG_MAX]\n")) \ + _mask = 0; \ + else \ + _mask = array_index_mask_nospec(index, size); \ + _mask; \ +}) + +/* * array_index_nospec - sanitize an array index after a bounds check * * For a code sequence like: @@ -61,7 +67,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, ({ \ typeof(index) _i = (index); \ typeof(size) _s = (size); \ - unsigned long _mask = array_index_mask_nospec(_i, _s); \ + unsigned long _mask = array_index_mask_nospec_check(_i, _s);\ \ BUILD_BUG_ON(sizeof(_i) > sizeof(long));\ BUILD_BUG_ON(sizeof(_s) > sizeof(long));\
[tip:locking/urgent] locking/qspinlock: Ensure node is initialised before updating prev->next
Commit-ID: 95bcade33a8af38755c9b0636e36a36ad3789fe6 Gitweb: https://git.kernel.org/tip/95bcade33a8af38755c9b0636e36a36ad3789fe6 Author: Will DeaconAuthorDate: Tue, 13 Feb 2018 13:22:56 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:50:14 +0100 locking/qspinlock: Ensure node is initialised before updating prev->next When a locker ends up queuing on the qspinlock locking slowpath, we initialise the relevant mcs node and publish it indirectly by updating the tail portion of the lock word using xchg_tail. If we find that there was a pre-existing locker in the queue, we subsequently update their ->next field to point at our node so that we are notified when it's our turn to take the lock. This can be roughly illustrated as follows: /* Initialise the fields in node and encode a pointer to node in tail */ tail = initialise_node(node); /* * Exchange tail into the lockword using an atomic read-modify-write * operation with release semantics */ old = xchg_tail(lock, tail); /* If there was a pre-existing waiter ... */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); smp_read_barrier_depends(); /* ... then update their ->next field to point to node. WRITE_ONCE(prev->next, node); } The conditional update of prev->next therefore relies on the address dependency from the result of xchg_tail ensuring order against the prior initialisation of node. However, since the release semantics of the xchg_tail operation apply only to the write portion of the RmW, then this ordering is not guaranteed and it is possible for the CPU to return old before the writes to node have been published, consequently allowing us to point prev->next to an uninitialised node. This patch fixes the problem by making the update of prev->next a RELEASE operation, which also removes the reliance on dependency ordering. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 38ece03..348c8ce 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -408,14 +408,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); + /* -* The above xchg_tail() is also a load of @lock which -* generates, through decode_tail(), a pointer. The address -* dependency matches the RELEASE of xchg_tail() such that -* the subsequent access to @prev happens after. +* We must ensure that the stores to @node are observed before +* the write to prev->next. The address dependency from +* xchg_tail is not sufficient to ensure this because the read +* component of xchg_tail is unordered with respect to the +* initialisation of @node. */ - - WRITE_ONCE(prev->next, node); + smp_store_release(>next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(>locked);
[tip:locking/urgent] locking/qspinlock: Ensure node is initialised before updating prev->next
Commit-ID: 95bcade33a8af38755c9b0636e36a36ad3789fe6 Gitweb: https://git.kernel.org/tip/95bcade33a8af38755c9b0636e36a36ad3789fe6 Author: Will Deacon AuthorDate: Tue, 13 Feb 2018 13:22:56 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:50:14 +0100 locking/qspinlock: Ensure node is initialised before updating prev->next When a locker ends up queuing on the qspinlock locking slowpath, we initialise the relevant mcs node and publish it indirectly by updating the tail portion of the lock word using xchg_tail. If we find that there was a pre-existing locker in the queue, we subsequently update their ->next field to point at our node so that we are notified when it's our turn to take the lock. This can be roughly illustrated as follows: /* Initialise the fields in node and encode a pointer to node in tail */ tail = initialise_node(node); /* * Exchange tail into the lockword using an atomic read-modify-write * operation with release semantics */ old = xchg_tail(lock, tail); /* If there was a pre-existing waiter ... */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); smp_read_barrier_depends(); /* ... then update their ->next field to point to node. WRITE_ONCE(prev->next, node); } The conditional update of prev->next therefore relies on the address dependency from the result of xchg_tail ensuring order against the prior initialisation of node. However, since the release semantics of the xchg_tail operation apply only to the write portion of the RmW, then this ordering is not guaranteed and it is possible for the CPU to return old before the writes to node have been published, consequently allowing us to point prev->next to an uninitialised node. This patch fixes the problem by making the update of prev->next a RELEASE operation, which also removes the reliance on dependency ordering. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 38ece03..348c8ce 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -408,14 +408,15 @@ queue: */ if (old & _Q_TAIL_MASK) { prev = decode_tail(old); + /* -* The above xchg_tail() is also a load of @lock which -* generates, through decode_tail(), a pointer. The address -* dependency matches the RELEASE of xchg_tail() such that -* the subsequent access to @prev happens after. +* We must ensure that the stores to @node are observed before +* the write to prev->next. The address dependency from +* xchg_tail is not sufficient to ensure this because the read +* component of xchg_tail is unordered with respect to the +* initialisation of @node. */ - - WRITE_ONCE(prev->next, node); + smp_store_release(>next, node); pv_wait_node(node, prev); arch_mcs_spin_lock_contended(>locked);
[tip:locking/urgent] locking/qspinlock: Ensure node->count is updated before initialising node
Commit-ID: 11dc13224c975efcec96647a4768a6f1bb7a19a8 Gitweb: https://git.kernel.org/tip/11dc13224c975efcec96647a4768a6f1bb7a19a8 Author: Will DeaconAuthorDate: Tue, 13 Feb 2018 13:22:57 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:50:14 +0100 locking/qspinlock: Ensure node->count is updated before initialising node When queuing on the qspinlock, the count field for the current CPU's head node is incremented. This needn't be atomic because locking in e.g. IRQ context is balanced and so an IRQ will return with node->count as it found it. However, the compiler could in theory reorder the initialisation of node[idx] before the increment of the head node->count, causing an IRQ to overwrite the initialised node and potentially corrupt the lock state. Avoid the potential for this harmful compiler reordering by placing a barrier() between the increment of the head node->count and the subsequent node initialisation. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 8 1 file changed, 8 insertions(+) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 348c8ce..d880296 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -379,6 +379,14 @@ queue: tail = encode_tail(smp_processor_id(), idx); node += idx; + + /* +* Ensure that we increment the head node->count before initialising +* the actual node. If the compiler is kind enough to reorder these +* stores, then an IRQ could overwrite our assignments. +*/ + barrier(); + node->locked = 0; node->next = NULL; pv_init_node(node);
[tip:locking/urgent] locking/qspinlock: Ensure node->count is updated before initialising node
Commit-ID: 11dc13224c975efcec96647a4768a6f1bb7a19a8 Gitweb: https://git.kernel.org/tip/11dc13224c975efcec96647a4768a6f1bb7a19a8 Author: Will Deacon AuthorDate: Tue, 13 Feb 2018 13:22:57 + Committer: Ingo Molnar CommitDate: Tue, 13 Feb 2018 14:50:14 +0100 locking/qspinlock: Ensure node->count is updated before initialising node When queuing on the qspinlock, the count field for the current CPU's head node is incremented. This needn't be atomic because locking in e.g. IRQ context is balanced and so an IRQ will return with node->count as it found it. However, the compiler could in theory reorder the initialisation of node[idx] before the increment of the head node->count, causing an IRQ to overwrite the initialised node and potentially corrupt the lock state. Avoid the potential for this harmful compiler reordering by placing a barrier() between the increment of the head node->count and the subsequent node initialisation. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1518528177-19169-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock.c | 8 1 file changed, 8 insertions(+) diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 348c8ce..d880296 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c @@ -379,6 +379,14 @@ queue: tail = encode_tail(smp_processor_id(), idx); node += idx; + + /* +* Ensure that we increment the head node->count before initialising +* the actual node. If the compiler is kind enough to reorder these +* stores, then an IRQ could overwrite our assignments. +*/ + barrier(); + node->locked = 0; node->next = NULL; pv_init_node(node);
[tip:locking/urgent] locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y
Commit-ID: d89c70356acf11b7cf47ca5cfcafae5062a85451 Gitweb: https://git.kernel.org/tip/d89c70356acf11b7cf47ca5cfcafae5062a85451 Author: Will DeaconAuthorDate: Tue, 28 Nov 2017 18:42:19 + Committer: Ingo Molnar CommitDate: Tue, 12 Dec 2017 11:24:01 +0100 locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y When CONFIG_GENERIC_LOCKBEAK=y, locking structures grow an extra int ->break_lock field which is used to implement raw_spin_is_contended() by setting the field to 1 when waiting on a lock and clearing it to zero when holding a lock. However, there are a few problems with this approach: - There is a write-write race between a CPU successfully taking the lock (and subsequently writing break_lock = 0) and a waiter waiting on the lock (and subsequently writing break_lock = 1). This could result in a contended lock being reported as uncontended and vice-versa. - On machines with store buffers, nothing guarantees that the writes to break_lock are visible to other CPUs at any particular time. - READ_ONCE/WRITE_ONCE are not used, so the field is potentially susceptible to harmful compiler optimisations, Consequently, the usefulness of this field is unclear and we'd be better off removing it and allowing architectures to implement raw_spin_is_contended() by providing a definition of arch_spin_is_contended(), as they can when CONFIG_GENERIC_LOCKBREAK=n. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1511894539-7988-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock_types.h | 3 --- include/linux/spinlock.h | 5 - include/linux/spinlock_types.h | 3 --- kernel/locking/spinlock.c | 9 + 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e..857a72c 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -10,9 +10,6 @@ */ typedef struct { arch_rwlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a391861..3bf2735 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -107,16 +107,11 @@ do { \ #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) -#ifdef CONFIG_GENERIC_LOCKBREAK -#define raw_spin_is_contended(lock) ((lock)->break_lock) -#else - #ifdef arch_spin_is_contended #define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) #else #define raw_spin_is_contended(lock)(((void)(lock), 0)) #endif /*arch_spin_is_contended*/ -#endif /* * This barrier must provide two things: diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb..24b4e6f 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,9 +19,6 @@ typedef struct raw_spinlock { arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 0ebb253..936f3d1 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ break; \ preempt_enable(); \ \ - if (!(lock)->break_lock)\ - (lock)->break_lock = 1; \ - \ arch_##op##_relax(>raw_lock); \ } \ - (lock)->break_lock = 0; \ } \ \ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ @@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ local_irq_restore(flags);
[tip:locking/urgent] locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y
Commit-ID: d89c70356acf11b7cf47ca5cfcafae5062a85451 Gitweb: https://git.kernel.org/tip/d89c70356acf11b7cf47ca5cfcafae5062a85451 Author: Will Deacon AuthorDate: Tue, 28 Nov 2017 18:42:19 + Committer: Ingo Molnar CommitDate: Tue, 12 Dec 2017 11:24:01 +0100 locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y When CONFIG_GENERIC_LOCKBEAK=y, locking structures grow an extra int ->break_lock field which is used to implement raw_spin_is_contended() by setting the field to 1 when waiting on a lock and clearing it to zero when holding a lock. However, there are a few problems with this approach: - There is a write-write race between a CPU successfully taking the lock (and subsequently writing break_lock = 0) and a waiter waiting on the lock (and subsequently writing break_lock = 1). This could result in a contended lock being reported as uncontended and vice-versa. - On machines with store buffers, nothing guarantees that the writes to break_lock are visible to other CPUs at any particular time. - READ_ONCE/WRITE_ONCE are not used, so the field is potentially susceptible to harmful compiler optimisations, Consequently, the usefulness of this field is unclear and we'd be better off removing it and allowing architectures to implement raw_spin_is_contended() by providing a definition of arch_spin_is_contended(), as they can when CONFIG_GENERIC_LOCKBREAK=n. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1511894539-7988-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock_types.h | 3 --- include/linux/spinlock.h | 5 - include/linux/spinlock_types.h | 3 --- kernel/locking/spinlock.c | 9 + 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e..857a72c 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -10,9 +10,6 @@ */ typedef struct { arch_rwlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a391861..3bf2735 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -107,16 +107,11 @@ do { \ #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) -#ifdef CONFIG_GENERIC_LOCKBREAK -#define raw_spin_is_contended(lock) ((lock)->break_lock) -#else - #ifdef arch_spin_is_contended #define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) #else #define raw_spin_is_contended(lock)(((void)(lock), 0)) #endif /*arch_spin_is_contended*/ -#endif /* * This barrier must provide two things: diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb..24b4e6f 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,9 +19,6 @@ typedef struct raw_spinlock { arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 0ebb253..936f3d1 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -66,12 +66,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ break; \ preempt_enable(); \ \ - if (!(lock)->break_lock)\ - (lock)->break_lock = 1; \ - \ arch_##op##_relax(>raw_lock); \ } \ - (lock)->break_lock = 0; \ } \ \ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ @@ -86,12 +82,9 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock) \ local_irq_restore(flags); \ preempt_enable(); \ \ - if (!(lock)->break_lock)
[tip:locking/urgent] locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK
Commit-ID: f87f3a328dbbb3e79dd53e7e889ced9222512649 Gitweb: https://git.kernel.org/tip/f87f3a328dbbb3e79dd53e7e889ced9222512649 Author: Will DeaconAuthorDate: Tue, 28 Nov 2017 18:42:18 + Committer: Ingo Molnar CommitDate: Tue, 12 Dec 2017 11:24:01 +0100 locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK Commit: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") removed the definition of raw_spin_can_lock(), causing the GENERIC_LOCKBREAK spin_lock() routines to poll the ->break_lock field when waiting on a lock. This has been reported to cause a deadlock during boot on s390, because the ->break_lock field is also set by the waiters, and can potentially remain set indefinitely if no other CPUs come in to take the lock after it has been released. This patch removes the explicit spinning on ->break_lock from the waiters, instead relying on the outer trylock() operation to determine when the lock is available. Reported-by: Sebastian Ott Tested-by: Sebastian Ott Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Thomas Gleixner Fixes: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") Link: http://lkml.kernel.org/r/1511894539-7988-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/spinlock.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 1fd1a75..0ebb253 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -68,8 +68,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ \ if (!(lock)->break_lock)\ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(>raw_lock); \ + \ + arch_##op##_relax(>raw_lock); \ } \ (lock)->break_lock = 0; \ } \ @@ -88,8 +88,8 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)\ \ if (!(lock)->break_lock)\ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(>raw_lock); \ + \ + arch_##op##_relax(>raw_lock); \ } \ (lock)->break_lock = 0; \ return flags; \
[tip:locking/urgent] locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK
Commit-ID: f87f3a328dbbb3e79dd53e7e889ced9222512649 Gitweb: https://git.kernel.org/tip/f87f3a328dbbb3e79dd53e7e889ced9222512649 Author: Will Deacon AuthorDate: Tue, 28 Nov 2017 18:42:18 + Committer: Ingo Molnar CommitDate: Tue, 12 Dec 2017 11:24:01 +0100 locking/core: Fix deadlock during boot on systems with GENERIC_LOCKBREAK Commit: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") removed the definition of raw_spin_can_lock(), causing the GENERIC_LOCKBREAK spin_lock() routines to poll the ->break_lock field when waiting on a lock. This has been reported to cause a deadlock during boot on s390, because the ->break_lock field is also set by the waiters, and can potentially remain set indefinitely if no other CPUs come in to take the lock after it has been released. This patch removes the explicit spinning on ->break_lock from the waiters, instead relying on the outer trylock() operation to determine when the lock is available. Reported-by: Sebastian Ott Tested-by: Sebastian Ott Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Thomas Gleixner Fixes: a8a217c22116 ("locking/core: Remove {read,spin,write}_can_lock()") Link: http://lkml.kernel.org/r/1511894539-7988-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/locking/spinlock.c | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c index 1fd1a75..0ebb253 100644 --- a/kernel/locking/spinlock.c +++ b/kernel/locking/spinlock.c @@ -68,8 +68,8 @@ void __lockfunc __raw_##op##_lock(locktype##_t *lock) \ \ if (!(lock)->break_lock)\ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(>raw_lock); \ + \ + arch_##op##_relax(>raw_lock); \ } \ (lock)->break_lock = 0; \ } \ @@ -88,8 +88,8 @@ unsigned long __lockfunc __raw_##op##_lock_irqsave(locktype##_t *lock)\ \ if (!(lock)->break_lock)\ (lock)->break_lock = 1; \ - while ((lock)->break_lock) \ - arch_##op##_relax(>raw_lock); \ + \ + arch_##op##_relax(>raw_lock); \ } \ (lock)->break_lock = 0; \ return flags; \
[tip:irq/urgent] irqdesc: Use bool return type instead of int
Commit-ID: 4ce413d1840b25b101be3c0559161db8891f3360 Gitweb: https://git.kernel.org/tip/4ce413d1840b25b101be3c0559161db8891f3360 Author: Will DeaconAuthorDate: Fri, 1 Dec 2017 15:29:39 + Committer: Thomas Gleixner CommitDate: Mon, 4 Dec 2017 20:51:12 +0100 irqdesc: Use bool return type instead of int The irq_balancing_disabled and irq_is_percpu{,_devid} functions are clearly intended to return bool like the functions in kernel/irq/settings.h, but actually return an int containing a masked value of desc->status_use_accessors. This can lead to subtle breakage if, for example, the return value is subsequently truncated when assigned to a narrower type. As Linus points out: | In particular, what can (and _has_ happened) is that people end up | using these functions that return true or false, and they assign the | result to something like a bitfield (or a char) or whatever. | | And the code looks *obviously* correct, when you have things like | | dev->percpu = irq_is_percpu_devid(dev->irq); | | and that "percpu" thing is just one status bit among many. It may even | *work*, because maybe that "percpu" flag ends up not being all that | important, or it just happens to never be set on the particular | hardware that people end up testing. | | But while it looks obviously correct, and might even work, it's really | fundamentally broken. Because that "true or false" function didn't | actually return 0/1, it returned 0 or 0x2. | | And 0x2 may not fit in a bitmask or a "char" or whatever. Fix the problem by consistently using bool as the return type for these functions. Reported-by: Linus Torvalds Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Cc: marc.zyng...@arm.com Link: https://lkml.kernel.org/r/1512142179-24616-1-git-send-email-will.dea...@arm.com --- include/linux/irqdesc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd41895..39fb370 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } -static inline int irq_balancing_disabled(unsigned int irq) +static inline bool irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; @@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq) return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; } -static inline int irq_is_percpu(unsigned int irq) +static inline bool irq_is_percpu(unsigned int irq) { struct irq_desc *desc; @@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq) return desc->status_use_accessors & IRQ_PER_CPU; } -static inline int irq_is_percpu_devid(unsigned int irq) +static inline bool irq_is_percpu_devid(unsigned int irq) { struct irq_desc *desc;
[tip:irq/urgent] irqdesc: Use bool return type instead of int
Commit-ID: 4ce413d1840b25b101be3c0559161db8891f3360 Gitweb: https://git.kernel.org/tip/4ce413d1840b25b101be3c0559161db8891f3360 Author: Will Deacon AuthorDate: Fri, 1 Dec 2017 15:29:39 + Committer: Thomas Gleixner CommitDate: Mon, 4 Dec 2017 20:51:12 +0100 irqdesc: Use bool return type instead of int The irq_balancing_disabled and irq_is_percpu{,_devid} functions are clearly intended to return bool like the functions in kernel/irq/settings.h, but actually return an int containing a masked value of desc->status_use_accessors. This can lead to subtle breakage if, for example, the return value is subsequently truncated when assigned to a narrower type. As Linus points out: | In particular, what can (and _has_ happened) is that people end up | using these functions that return true or false, and they assign the | result to something like a bitfield (or a char) or whatever. | | And the code looks *obviously* correct, when you have things like | | dev->percpu = irq_is_percpu_devid(dev->irq); | | and that "percpu" thing is just one status bit among many. It may even | *work*, because maybe that "percpu" flag ends up not being all that | important, or it just happens to never be set on the particular | hardware that people end up testing. | | But while it looks obviously correct, and might even work, it's really | fundamentally broken. Because that "true or false" function didn't | actually return 0/1, it returned 0 or 0x2. | | And 0x2 may not fit in a bitmask or a "char" or whatever. Fix the problem by consistently using bool as the return type for these functions. Reported-by: Linus Torvalds Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Cc: marc.zyng...@arm.com Link: https://lkml.kernel.org/r/1512142179-24616-1-git-send-email-will.dea...@arm.com --- include/linux/irqdesc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd41895..39fb370 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } -static inline int irq_balancing_disabled(unsigned int irq) +static inline bool irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; @@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq) return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; } -static inline int irq_is_percpu(unsigned int irq) +static inline bool irq_is_percpu(unsigned int irq) { struct irq_desc *desc; @@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq) return desc->status_use_accessors & IRQ_PER_CPU; } -static inline int irq_is_percpu_devid(unsigned int irq) +static inline bool irq_is_percpu_devid(unsigned int irq) { struct irq_desc *desc;
[tip:locking/core] locking/qrwlock: Prevent slowpath writers getting held up by fastpath
Commit-ID: d133166146333e1f13fc81c0e6c43c8d99290a8a Gitweb: https://git.kernel.org/tip/d133166146333e1f13fc81c0e6c43c8d99290a8a Author: Will DeaconAuthorDate: Thu, 12 Oct 2017 13:20:51 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:25 +0200 locking/qrwlock: Prevent slowpath writers getting held up by fastpath When a prospective writer takes the qrwlock locking slowpath due to the lock being held, it attempts to cmpxchg the wmode field from 0 to _QW_WAITING so that concurrent lockers also take the slowpath and queue on the spinlock accordingly, allowing the lockers to drain. Unfortunately, this isn't fair, because a fastpath writer that comes in after the lock is made available but before the _QW_WAITING flag is set can effectively jump the queue. If there is a steady stream of prospective writers, then the waiter will be held off indefinitely. This patch restores fairness by separating _QW_WAITING and _QW_LOCKED into two distinct fields: _QW_LOCKED continues to occupy the bottom byte of the lockword so that it can be cleared unconditionally when unlocking, but _QW_WAITING now occupies what used to be the bottom bit of the reader count. This then forces the slow-path for concurrent lockers. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 23 +-- include/asm-generic/qrwlock_types.h | 8 kernel/locking/qrwlock.c| 20 +--- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index c716b02..0f7062b 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -26,24 +26,11 @@ /* * Writer states & reader shift and bias. - * - * | +0 | +1 | +2 | +3 | - * +++++ - *LE | 78 | 56 | 34 | 12 | 0x12345678 - * +++++ - * | wr | rd | - * +++++ - * - * +++++ - *BE | 12 | 34 | 56 | 78 | 0x12345678 - * +++++ - * | rd | wr | - * +++++ */ -#define_QW_WAITING 1 /* A writer is waiting */ -#define_QW_LOCKED 0xff/* A writer holds the lock */ -#define_QW_WMASK 0xff/* Writer mask */ -#define_QR_SHIFT 8 /* Reader count shift */ +#define_QW_WAITING 0x100 /* A writer is waiting */ +#define_QW_LOCKED 0x0ff /* A writer holds the lock */ +#define_QW_WMASK 0x1ff /* Writer mask */ +#define_QR_SHIFT 9 /* Reader count shift */ #define _QR_BIAS (1U << _QR_SHIFT) /* @@ -134,7 +121,7 @@ static inline void queued_read_unlock(struct qrwlock *lock) */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release(>wmode, 0); + smp_store_release(>wlocked, 0); } /* diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index 507f2dc..8af752a 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -13,11 +13,11 @@ typedef struct qrwlock { atomic_t cnts; struct { #ifdef __LITTLE_ENDIAN - u8 wmode; /* Writer mode */ - u8 rcnts[3];/* Reader counts */ + u8 wlocked; /* Locked for write? */ + u8 __lstate[3]; #else - u8 rcnts[3];/* Reader counts */ - u8 wmode; /* Writer mode */ + u8 __lstate[3]; + u8 wlocked; /* Locked for write? */ #endif }; }; diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 5825e0f..c7471c3 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -39,8 +39,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock) * so spin with ACQUIRE semantics until the lock is available * without waiting in the queue. */ - atomic_cond_read_acquire(>cnts, (VAL & _QW_WMASK) -
[tip:locking/core] locking/qrwlock: Prevent slowpath writers getting held up by fastpath
Commit-ID: d133166146333e1f13fc81c0e6c43c8d99290a8a Gitweb: https://git.kernel.org/tip/d133166146333e1f13fc81c0e6c43c8d99290a8a Author: Will Deacon AuthorDate: Thu, 12 Oct 2017 13:20:51 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:25 +0200 locking/qrwlock: Prevent slowpath writers getting held up by fastpath When a prospective writer takes the qrwlock locking slowpath due to the lock being held, it attempts to cmpxchg the wmode field from 0 to _QW_WAITING so that concurrent lockers also take the slowpath and queue on the spinlock accordingly, allowing the lockers to drain. Unfortunately, this isn't fair, because a fastpath writer that comes in after the lock is made available but before the _QW_WAITING flag is set can effectively jump the queue. If there is a steady stream of prospective writers, then the waiter will be held off indefinitely. This patch restores fairness by separating _QW_WAITING and _QW_LOCKED into two distinct fields: _QW_LOCKED continues to occupy the bottom byte of the lockword so that it can be cleared unconditionally when unlocking, but _QW_WAITING now occupies what used to be the bottom bit of the reader count. This then forces the slow-path for concurrent lockers. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 23 +-- include/asm-generic/qrwlock_types.h | 8 kernel/locking/qrwlock.c| 20 +--- 3 files changed, 14 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index c716b02..0f7062b 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -26,24 +26,11 @@ /* * Writer states & reader shift and bias. - * - * | +0 | +1 | +2 | +3 | - * +++++ - *LE | 78 | 56 | 34 | 12 | 0x12345678 - * +++++ - * | wr | rd | - * +++++ - * - * +++++ - *BE | 12 | 34 | 56 | 78 | 0x12345678 - * +++++ - * | rd | wr | - * +++++ */ -#define_QW_WAITING 1 /* A writer is waiting */ -#define_QW_LOCKED 0xff/* A writer holds the lock */ -#define_QW_WMASK 0xff/* Writer mask */ -#define_QR_SHIFT 8 /* Reader count shift */ +#define_QW_WAITING 0x100 /* A writer is waiting */ +#define_QW_LOCKED 0x0ff /* A writer holds the lock */ +#define_QW_WMASK 0x1ff /* Writer mask */ +#define_QR_SHIFT 9 /* Reader count shift */ #define _QR_BIAS (1U << _QR_SHIFT) /* @@ -134,7 +121,7 @@ static inline void queued_read_unlock(struct qrwlock *lock) */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release(>wmode, 0); + smp_store_release(>wlocked, 0); } /* diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index 507f2dc..8af752a 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -13,11 +13,11 @@ typedef struct qrwlock { atomic_t cnts; struct { #ifdef __LITTLE_ENDIAN - u8 wmode; /* Writer mode */ - u8 rcnts[3];/* Reader counts */ + u8 wlocked; /* Locked for write? */ + u8 __lstate[3]; #else - u8 rcnts[3];/* Reader counts */ - u8 wmode; /* Writer mode */ + u8 __lstate[3]; + u8 wlocked; /* Locked for write? */ #endif }; }; diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 5825e0f..c7471c3 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -39,8 +39,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock) * so spin with ACQUIRE semantics until the lock is available * without waiting in the queue. */ - atomic_cond_read_acquire(>cnts, (VAL & _QW_WMASK) -!= _QW_LOCKED); + atomic_cond_read_acquire(>cnts, !(VAL & _QW_LOCKED)); return; } atomic_sub(_QR_BIAS, >cnts); @@ -56,7 +55,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock) * that accesses can't leak
[tip:locking/core] locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks
Commit-ID: 087133ac90763cd339b6b67f2998f87dcc136c52 Gitweb: https://git.kernel.org/tip/087133ac90763cd339b6b67f2998f87dcc136c52 Author: Will DeaconAuthorDate: Thu, 12 Oct 2017 13:20:50 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:25 +0200 locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks Now that the qrwlock can make use of WFE, remove our homebrewed rwlock code in favour of the generic queued implementation. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507810851-306-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/Kconfig | 17 arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/spinlock.h | 164 +--- arch/arm64/include/asm/spinlock_types.h | 6 +- 4 files changed, 20 insertions(+), 168 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df64a6..df02ad9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -22,7 +22,24 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA + select ARCH_INLINE_READ_LOCK if !PREEMPT + select ARCH_INLINE_READ_LOCK_BH if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_READ_UNLOCK if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_WRITE_LOCK if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_QUEUED_RWLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 2326e39..e63d0a8 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h generic-y += preempt.h +generic-y += qrwlock.h generic-y += rwsem.h generic-y += segment.h generic-y += serial.h diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index aa51a38..fdb827c 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -137,169 +137,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) } #define arch_spin_is_contended arch_spin_is_contended -/* - * Write lock implementation. - * - * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is - * exclusively held. - * - * The memory barriers are implicit with the load-acquire and store-release - * instructions. - */ - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz%w0, 1b\n" - " stxr%w0, %w2, %1\n" - " cbnz%w0, 2b\n" - __nops(1), - /* LSE atomics */ - "1: mov %w0, wzr\n" - "2: casa%w0, %w2, %1\n" - " cbz %w0, 3f\n" - " ldxr%w0, %1\n" - " cbz %w0, 2b\n" - " wfe\n" - " b 1b\n" - "3:") - : "=" (tmp), "+Q" (rw->lock) - : "r" (0x8000) - : "memory"); -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - "1: ldaxr %w0, %1\n" - " cbnz%w0, 2f\n" - " stxr%w0, %w2, %1\n" - " cbnz%w0, 1b\n" - "2:", - /* LSE atomics */ - " mov %w0, wzr\n" - " casa%w0, %w2, %1\n" - __nops(2)) - : "=" (tmp),
[tip:locking/core] locking/atomic: Add atomic_cond_read_acquire()
Commit-ID: 4df714be4dcf40bfb0d4af0f851a6e1977afa02e Gitweb: https://git.kernel.org/tip/4df714be4dcf40bfb0d4af0f851a6e1977afa02e Author: Will DeaconAuthorDate: Thu, 12 Oct 2017 13:20:48 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/atomic: Add atomic_cond_read_acquire() smp_cond_load_acquire() provides a way to spin on a variable with acquire semantics until some conditional expression involving the variable is satisfied. Architectures such as arm64 can potentially enter a low-power state, waking up only when the value of the variable changes, which reduces the system impact of tight polling loops. This patch makes the same interface available to users of atomic_t, atomic64_t and atomic_long_t, rather than require messy accesses to the structure internals. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 3 +++ include/linux/atomic.h| 4 2 files changed, 7 insertions(+) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 288cc9e..f2d97b7 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -243,4 +243,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) +#define atomic_long_cond_read_acquire(v, c) \ + ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) + #endif /* _ASM_GENERIC_ATOMIC_LONG_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 40d6bfe..0aeb2b3 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -653,6 +653,8 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif @@ -1072,6 +1074,8 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #include #endif /* _LINUX_ATOMIC_H */
[tip:locking/core] locking/atomic: Add atomic_cond_read_acquire()
Commit-ID: 4df714be4dcf40bfb0d4af0f851a6e1977afa02e Gitweb: https://git.kernel.org/tip/4df714be4dcf40bfb0d4af0f851a6e1977afa02e Author: Will Deacon AuthorDate: Thu, 12 Oct 2017 13:20:48 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/atomic: Add atomic_cond_read_acquire() smp_cond_load_acquire() provides a way to spin on a variable with acquire semantics until some conditional expression involving the variable is satisfied. Architectures such as arm64 can potentially enter a low-power state, waking up only when the value of the variable changes, which reduces the system impact of tight polling loops. This patch makes the same interface available to users of atomic_t, atomic64_t and atomic_long_t, rather than require messy accesses to the structure internals. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-long.h | 3 +++ include/linux/atomic.h| 4 2 files changed, 7 insertions(+) diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h index 288cc9e..f2d97b7 100644 --- a/include/asm-generic/atomic-long.h +++ b/include/asm-generic/atomic-long.h @@ -243,4 +243,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u) #define atomic_long_inc_not_zero(l) \ ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l)) +#define atomic_long_cond_read_acquire(v, c) \ + ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c)) + #endif /* _ASM_GENERIC_ATOMIC_LONG_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 40d6bfe..0aeb2b3 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -653,6 +653,8 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif @@ -1072,6 +1074,8 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #include #endif /* _LINUX_ATOMIC_H */
[tip:locking/core] locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks
Commit-ID: 087133ac90763cd339b6b67f2998f87dcc136c52 Gitweb: https://git.kernel.org/tip/087133ac90763cd339b6b67f2998f87dcc136c52 Author: Will Deacon AuthorDate: Thu, 12 Oct 2017 13:20:50 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:25 +0200 locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks Now that the qrwlock can make use of WFE, remove our homebrewed rwlock code in favour of the generic queued implementation. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Thomas Gleixner Cc: boqun.f...@gmail.com Cc: linux-arm-ker...@lists.infradead.org Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507810851-306-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/Kconfig | 17 arch/arm64/include/asm/Kbuild | 1 + arch/arm64/include/asm/spinlock.h | 164 +--- arch/arm64/include/asm/spinlock_types.h | 6 +- 4 files changed, 20 insertions(+), 168 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0df64a6..df02ad9 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -22,7 +22,24 @@ config ARM64 select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA + select ARCH_INLINE_READ_LOCK if !PREEMPT + select ARCH_INLINE_READ_LOCK_BH if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_READ_UNLOCK if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT + select ARCH_INLINE_WRITE_LOCK if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT + select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_QUEUED_RWLOCKS select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_NUMA_BALANCING diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 2326e39..e63d0a8 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -16,6 +16,7 @@ generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h generic-y += msi.h generic-y += preempt.h +generic-y += qrwlock.h generic-y += rwsem.h generic-y += segment.h generic-y += serial.h diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index aa51a38..fdb827c 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -137,169 +137,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock) } #define arch_spin_is_contended arch_spin_is_contended -/* - * Write lock implementation. - * - * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is - * exclusively held. - * - * The memory barriers are implicit with the load-acquire and store-release - * instructions. - */ - -static inline void arch_write_lock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - " sevl\n" - "1: wfe\n" - "2: ldaxr %w0, %1\n" - " cbnz%w0, 1b\n" - " stxr%w0, %w2, %1\n" - " cbnz%w0, 2b\n" - __nops(1), - /* LSE atomics */ - "1: mov %w0, wzr\n" - "2: casa%w0, %w2, %1\n" - " cbz %w0, 3f\n" - " ldxr%w0, %1\n" - " cbz %w0, 2b\n" - " wfe\n" - " b 1b\n" - "3:") - : "=" (tmp), "+Q" (rw->lock) - : "r" (0x8000) - : "memory"); -} - -static inline int arch_write_trylock(arch_rwlock_t *rw) -{ - unsigned int tmp; - - asm volatile(ARM64_LSE_ATOMIC_INSN( - /* LL/SC */ - "1: ldaxr %w0, %1\n" - " cbnz%w0, 2f\n" - " stxr%w0, %w2, %1\n" - " cbnz%w0, 1b\n" - "2:", - /* LSE atomics */ - " mov %w0, wzr\n" - " casa%w0, %w2, %1\n" - __nops(2)) - : "=" (tmp), "+Q" (rw->lock) - : "r" (0x8000) - : "memory"); - - return !tmp; -} - -static inline void arch_write_unlock(arch_rwlock_t *rw) -{ - asm volatile(ARM64_LSE_ATOMIC_INSN( - " stlrwzr, %0", - "
[tip:locking/core] locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock
Commit-ID: b519b56e378ee82caf9b079b04f5db87dedc3251 Gitweb: https://git.kernel.org/tip/b519b56e378ee82caf9b079b04f5db87dedc3251 Author: Will DeaconAuthorDate: Thu, 12 Oct 2017 13:20:49 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock The qrwlock slowpaths involve spinning when either a prospective reader is waiting for a concurrent writer to drain, or a prospective writer is waiting for concurrent readers to drain. In both of these situations, atomic_cond_read_acquire() can be used to avoid busy-waiting and make use of any backoff functionality provided by the architecture. This patch replaces the open-code loops and rspin_until_writer_unlock() implementation with atomic_cond_read_acquire(). The write mode transition zero to _QW_WAITING is left alone, since (a) this doesn't need acquire semantics and (b) should be fast. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 4 ++-- kernel/locking/qrwlock.c | 50 +++ 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 02c0a76..c716b02 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -49,7 +49,7 @@ /* * External function declarations */ -extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts); +extern void queued_read_lock_slowpath(struct qrwlock *lock); extern void queued_write_lock_slowpath(struct qrwlock *lock); /** @@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ - queued_read_lock_slowpath(lock, cnts); + queued_read_lock_slowpath(lock); } /** diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 1af791e..5825e0f 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -24,28 +24,10 @@ #include /** - * rspin_until_writer_unlock - inc reader count & spin until writer is gone - * @lock : Pointer to queue rwlock structure - * @writer: Current queue rwlock writer status byte - * - * In interrupt context or at the head of the queue, the reader will just - * increment the reader count & wait until the writer releases the lock. - */ -static __always_inline void -rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) -{ - while ((cnts & _QW_WMASK) == _QW_LOCKED) { - cpu_relax(); - cnts = atomic_read_acquire(>cnts); - } -} - -/** * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure - * @cnts: Current qrwlock lock value */ -void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) +void queued_read_lock_slowpath(struct qrwlock *lock) { /* * Readers come here when they cannot get the lock without waiting @@ -53,13 +35,12 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) if (unlikely(in_interrupt())) { /* * Readers in interrupt context will get the lock immediately -* if the writer is just waiting (not holding the lock yet). -* The rspin_until_writer_unlock() function returns immediately -* in this case. Otherwise, they will spin (with ACQUIRE -* semantics) until the lock is available without waiting in -* the queue. +* if the writer is just waiting (not holding the lock yet), +* so spin with ACQUIRE semantics until the lock is available +* without waiting in the queue. */ - rspin_until_writer_unlock(lock, cnts); + atomic_cond_read_acquire(>cnts, (VAL & _QW_WMASK) +!= _QW_LOCKED); return; } atomic_sub(_QR_BIAS, >cnts); @@ -68,14 +49,14 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) * Put the reader into the wait queue */ arch_spin_lock(>wait_lock); + atomic_add(_QR_BIAS, >cnts); /* * The ACQUIRE semantics of the following spinning
[tip:locking/core] locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock
Commit-ID: b519b56e378ee82caf9b079b04f5db87dedc3251 Gitweb: https://git.kernel.org/tip/b519b56e378ee82caf9b079b04f5db87dedc3251 Author: Will Deacon AuthorDate: Thu, 12 Oct 2017 13:20:49 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock The qrwlock slowpaths involve spinning when either a prospective reader is waiting for a concurrent writer to drain, or a prospective writer is waiting for concurrent readers to drain. In both of these situations, atomic_cond_read_acquire() can be used to avoid busy-waiting and make use of any backoff functionality provided by the architecture. This patch replaces the open-code loops and rspin_until_writer_unlock() implementation with atomic_cond_read_acquire(). The write mode transition zero to _QW_WAITING is left alone, since (a) this doesn't need acquire semantics and (b) should be fast. Tested-by: Waiman Long Tested-by: Jeremy Linton Tested-by: Adam Wallis Tested-by: Jan Glauber Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 4 ++-- kernel/locking/qrwlock.c | 50 +++ 2 files changed, 14 insertions(+), 40 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 02c0a76..c716b02 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -49,7 +49,7 @@ /* * External function declarations */ -extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts); +extern void queued_read_lock_slowpath(struct qrwlock *lock); extern void queued_write_lock_slowpath(struct qrwlock *lock); /** @@ -100,7 +100,7 @@ static inline void queued_read_lock(struct qrwlock *lock) return; /* The slowpath will decrement the reader count, if necessary. */ - queued_read_lock_slowpath(lock, cnts); + queued_read_lock_slowpath(lock); } /** diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 1af791e..5825e0f 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -24,28 +24,10 @@ #include /** - * rspin_until_writer_unlock - inc reader count & spin until writer is gone - * @lock : Pointer to queue rwlock structure - * @writer: Current queue rwlock writer status byte - * - * In interrupt context or at the head of the queue, the reader will just - * increment the reader count & wait until the writer releases the lock. - */ -static __always_inline void -rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) -{ - while ((cnts & _QW_WMASK) == _QW_LOCKED) { - cpu_relax(); - cnts = atomic_read_acquire(>cnts); - } -} - -/** * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure - * @cnts: Current qrwlock lock value */ -void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) +void queued_read_lock_slowpath(struct qrwlock *lock) { /* * Readers come here when they cannot get the lock without waiting @@ -53,13 +35,12 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) if (unlikely(in_interrupt())) { /* * Readers in interrupt context will get the lock immediately -* if the writer is just waiting (not holding the lock yet). -* The rspin_until_writer_unlock() function returns immediately -* in this case. Otherwise, they will spin (with ACQUIRE -* semantics) until the lock is available without waiting in -* the queue. +* if the writer is just waiting (not holding the lock yet), +* so spin with ACQUIRE semantics until the lock is available +* without waiting in the queue. */ - rspin_until_writer_unlock(lock, cnts); + atomic_cond_read_acquire(>cnts, (VAL & _QW_WMASK) +!= _QW_LOCKED); return; } atomic_sub(_QR_BIAS, >cnts); @@ -68,14 +49,14 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) * Put the reader into the wait queue */ arch_spin_lock(>wait_lock); + atomic_add(_QR_BIAS, >cnts); /* * The ACQUIRE semantics of the following spinning code ensure * that accesses can't leak upwards out of our subsequent critical * section in the case that the lock is currently held for write. */ - cnts = atomic_fetch_add_acquire(_QR_BIAS, >cnts); - rspin_until_writer_unlock(lock, cnts); +
[tip:locking/core] locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'
Commit-ID: e0d02285f16e8d5810f3d5d5e8a5886ca0015d3b Gitweb: https://git.kernel.org/tip/e0d02285f16e8d5810f3d5d5e8a5886ca0015d3b Author: Will DeaconAuthorDate: Thu, 12 Oct 2017 13:20:47 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock' There's no good reason to keep the internal structure of struct qrwlock hidden from qrwlock.h, particularly as it's actually needed for unlock and ends up being abstracted independently behind the __qrwlock_write_byte() function. Stop pretending we can hide this stuff, and move the __qrwlock definition into qrwlock, removing the __qrwlock_write_byte() nastiness and using the same struct definition everywhere instead. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 12 +--- include/asm-generic/qrwlock_types.h | 15 +-- kernel/locking/qrwlock.c| 26 ++ 3 files changed, 16 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 5092532..02c0a76 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -129,22 +129,12 @@ static inline void queued_read_unlock(struct qrwlock *lock) } /** - * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock - * @lock : Pointer to queue rwlock structure - * Return: the write byte address of a queue rwlock - */ -static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) -{ - return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); -} - -/** * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release(__qrwlock_write_byte(lock), 0); + smp_store_release(>wmode, 0); } /* diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index 0abc6b6..507f2dc 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -9,12 +9,23 @@ */ typedef struct qrwlock { - atomic_tcnts; + union { + atomic_t cnts; + struct { +#ifdef __LITTLE_ENDIAN + u8 wmode; /* Writer mode */ + u8 rcnts[3];/* Reader counts */ +#else + u8 rcnts[3];/* Reader counts */ + u8 wmode; /* Writer mode */ +#endif + }; + }; arch_spinlock_t wait_lock; } arch_rwlock_t; #define__ARCH_RW_LOCK_UNLOCKED { \ - .cnts = ATOMIC_INIT(0), \ + { .cnts = ATOMIC_INIT(0), },\ .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ } diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 2655f26..1af791e 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -23,26 +23,6 @@ #include #include -/* - * This internal data structure is used for optimizing access to some of - * the subfields within the atomic_t cnts. - */ -struct __qrwlock { - union { - atomic_t cnts; - struct { -#ifdef __LITTLE_ENDIAN - u8 wmode; /* Writer mode */ - u8 rcnts[3];/* Reader counts */ -#else - u8 rcnts[3];/* Reader counts */ - u8 wmode; /* Writer mode */ -#endif - }; - }; - arch_spinlock_t lock; -}; - /** * rspin_until_writer_unlock - inc reader count & spin until writer is gone * @lock : Pointer to queue rwlock structure @@ -125,10 +105,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock) * or wait for a previous writer to go away. */ for (;;) { - struct __qrwlock *l = (struct __qrwlock *)lock; - - if (!READ_ONCE(l->wmode) && - (cmpxchg_relaxed(>wmode, 0, _QW_WAITING) == 0)) + if (!READ_ONCE(lock->wmode) && + (cmpxchg_relaxed(>wmode, 0, _QW_WAITING) == 0)) break; cpu_relax();
[tip:locking/core] locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'
Commit-ID: e0d02285f16e8d5810f3d5d5e8a5886ca0015d3b Gitweb: https://git.kernel.org/tip/e0d02285f16e8d5810f3d5d5e8a5886ca0015d3b Author: Will Deacon AuthorDate: Thu, 12 Oct 2017 13:20:47 +0100 Committer: Ingo Molnar CommitDate: Wed, 25 Oct 2017 10:57:24 +0200 locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock' There's no good reason to keep the internal structure of struct qrwlock hidden from qrwlock.h, particularly as it's actually needed for unlock and ends up being abstracted independently behind the __qrwlock_write_byte() function. Stop pretending we can hide this stuff, and move the __qrwlock definition into qrwlock, removing the __qrwlock_write_byte() nastiness and using the same struct definition everywhere instead. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: jeremy.lin...@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 12 +--- include/asm-generic/qrwlock_types.h | 15 +-- kernel/locking/qrwlock.c| 26 ++ 3 files changed, 16 insertions(+), 37 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 5092532..02c0a76 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -129,22 +129,12 @@ static inline void queued_read_unlock(struct qrwlock *lock) } /** - * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock - * @lock : Pointer to queue rwlock structure - * Return: the write byte address of a queue rwlock - */ -static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) -{ - return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); -} - -/** * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release(__qrwlock_write_byte(lock), 0); + smp_store_release(>wmode, 0); } /* diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h index 0abc6b6..507f2dc 100644 --- a/include/asm-generic/qrwlock_types.h +++ b/include/asm-generic/qrwlock_types.h @@ -9,12 +9,23 @@ */ typedef struct qrwlock { - atomic_tcnts; + union { + atomic_t cnts; + struct { +#ifdef __LITTLE_ENDIAN + u8 wmode; /* Writer mode */ + u8 rcnts[3];/* Reader counts */ +#else + u8 rcnts[3];/* Reader counts */ + u8 wmode; /* Writer mode */ +#endif + }; + }; arch_spinlock_t wait_lock; } arch_rwlock_t; #define__ARCH_RW_LOCK_UNLOCKED { \ - .cnts = ATOMIC_INIT(0), \ + { .cnts = ATOMIC_INIT(0), },\ .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \ } diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 2655f26..1af791e 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -23,26 +23,6 @@ #include #include -/* - * This internal data structure is used for optimizing access to some of - * the subfields within the atomic_t cnts. - */ -struct __qrwlock { - union { - atomic_t cnts; - struct { -#ifdef __LITTLE_ENDIAN - u8 wmode; /* Writer mode */ - u8 rcnts[3];/* Reader counts */ -#else - u8 rcnts[3];/* Reader counts */ - u8 wmode; /* Writer mode */ -#endif - }; - }; - arch_spinlock_t lock; -}; - /** * rspin_until_writer_unlock - inc reader count & spin until writer is gone * @lock : Pointer to queue rwlock structure @@ -125,10 +105,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock) * or wait for a previous writer to go away. */ for (;;) { - struct __qrwlock *l = (struct __qrwlock *)lock; - - if (!READ_ONCE(l->wmode) && - (cmpxchg_relaxed(>wmode, 0, _QW_WAITING) == 0)) + if (!READ_ONCE(lock->wmode) && + (cmpxchg_relaxed(>wmode, 0, _QW_WAITING) == 0)) break; cpu_relax();
[tip:locking/core] locking/barriers: Kill lockless_dereference()
Commit-ID: 59ecbbe7b31cd2d86ff9a9f461a00f7e7533aedc Gitweb: https://git.kernel.org/tip/59ecbbe7b31cd2d86ff9a9f461a00f7e7533aedc Author: Will DeaconAuthorDate: Tue, 24 Oct 2017 11:22:49 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:33 +0200 locking/barriers: Kill lockless_dereference() lockless_dereference() is a nice idea, but it gained little traction in kernel code since its introduction three years ago. This is partly because it's a pain to type, but also because using READ_ONCE() instead has worked correctly on all architectures apart from Alpha, which is a fully supported but somewhat niche architecture these days. Now that READ_ONCE() has been upgraded to contain an implicit smp_read_barrier_depends() and the few callers of lockless_dereference() have been converted, we can remove lockless_dereference() altogether. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt| 12 Documentation/translations/ko_KR/memory-barriers.txt | 12 include/linux/compiler.h | 20 3 files changed, 44 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index b759a60..470a682 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1886,18 +1886,6 @@ There are some more advanced barrier functions: See Documentation/atomic_{t,bitops}.txt for more information. - (*) lockless_dereference(); - - This can be thought of as a pointer-fetch wrapper around the - smp_read_barrier_depends() data-dependency barrier. - - This is also similar to rcu_dereference(), but in cases where - object lifetime is handled by some mechanism other than RCU, for - example, when the objects removed only when the system goes down. - In addition, lockless_dereference() is used in some data structures - that can be used both with and without RCU. - - (*) dma_wmb(); (*) dma_rmb(); diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index a7a8132..ec3b46e 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -1858,18 +1858,6 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효 참고하세요. - (*) lockless_dereference(); - - 이 함수는 smp_read_barrier_depends() 데이터 의존성 배리어를 사용하는 - 포인터 읽어오기 래퍼(wrapper) 함수로 생각될 수 있습니다. - - 객체의 라이프타임이 RCU 외의 메커니즘으로 관리된다는 점을 제외하면 - rcu_dereference() 와도 유사한데, 예를 들면 객체가 시스템이 꺼질 때에만 - 제거되는 경우 등입니다. 또한, lockless_dereference() 은 RCU 와 함께 - 사용될수도, RCU 없이 사용될 수도 있는 일부 데이터 구조에 사용되고 - 있습니다. - - (*) dma_wmb(); (*) dma_rmb(); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d7b77d..5a1cab4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -346,24 +346,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) -/** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - * - * The seemingly unused variable ___typecheck_p validates that @p is - * indeed a pointer type by using a pointer to typeof(*p) as the type. - * Taking a pointer to typeof(*p) again is needed in case p is void *. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _p1 = READ_ONCE(p); \ - typeof(*(p)) *___typecheck_p __maybe_unused; \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_p1); \ -}) - #endif /* __LINUX_COMPILER_H */
[tip:locking/core] locking/barriers: Kill lockless_dereference()
Commit-ID: 59ecbbe7b31cd2d86ff9a9f461a00f7e7533aedc Gitweb: https://git.kernel.org/tip/59ecbbe7b31cd2d86ff9a9f461a00f7e7533aedc Author: Will Deacon AuthorDate: Tue, 24 Oct 2017 11:22:49 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:33 +0200 locking/barriers: Kill lockless_dereference() lockless_dereference() is a nice idea, but it gained little traction in kernel code since its introduction three years ago. This is partly because it's a pain to type, but also because using READ_ONCE() instead has worked correctly on all architectures apart from Alpha, which is a fully supported but somewhat niche architecture these days. Now that READ_ONCE() has been upgraded to contain an implicit smp_read_barrier_depends() and the few callers of lockless_dereference() have been converted, we can remove lockless_dereference() altogether. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-5-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- Documentation/memory-barriers.txt| 12 Documentation/translations/ko_KR/memory-barriers.txt | 12 include/linux/compiler.h | 20 3 files changed, 44 deletions(-) diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt index b759a60..470a682 100644 --- a/Documentation/memory-barriers.txt +++ b/Documentation/memory-barriers.txt @@ -1886,18 +1886,6 @@ There are some more advanced barrier functions: See Documentation/atomic_{t,bitops}.txt for more information. - (*) lockless_dereference(); - - This can be thought of as a pointer-fetch wrapper around the - smp_read_barrier_depends() data-dependency barrier. - - This is also similar to rcu_dereference(), but in cases where - object lifetime is handled by some mechanism other than RCU, for - example, when the objects removed only when the system goes down. - In addition, lockless_dereference() is used in some data structures - that can be used both with and without RCU. - - (*) dma_wmb(); (*) dma_rmb(); diff --git a/Documentation/translations/ko_KR/memory-barriers.txt b/Documentation/translations/ko_KR/memory-barriers.txt index a7a8132..ec3b46e 100644 --- a/Documentation/translations/ko_KR/memory-barriers.txt +++ b/Documentation/translations/ko_KR/memory-barriers.txt @@ -1858,18 +1858,6 @@ Mandatory 배리어들은 SMP 시스템에서도 UP 시스템에서도 SMP 효 참고하세요. - (*) lockless_dereference(); - - 이 함수는 smp_read_barrier_depends() 데이터 의존성 배리어를 사용하는 - 포인터 읽어오기 래퍼(wrapper) 함수로 생각될 수 있습니다. - - 객체의 라이프타임이 RCU 외의 메커니즘으로 관리된다는 점을 제외하면 - rcu_dereference() 와도 유사한데, 예를 들면 객체가 시스템이 꺼질 때에만 - 제거되는 경우 등입니다. 또한, lockless_dereference() 은 RCU 와 함께 - 사용될수도, RCU 없이 사용될 수도 있는 일부 데이터 구조에 사용되고 - 있습니다. - - (*) dma_wmb(); (*) dma_rmb(); diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d7b77d..5a1cab4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -346,24 +346,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) -/** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - * - * The seemingly unused variable ___typecheck_p validates that @p is - * indeed a pointer type by using a pointer to typeof(*p) as the type. - * Taking a pointer to typeof(*p) again is needed in case p is void *. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _p1 = READ_ONCE(p); \ - typeof(*(p)) *___typecheck_p __maybe_unused; \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_p1); \ -}) - #endif /* __LINUX_COMPILER_H */
[tip:locking/core] locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics
Commit-ID: 5a8897cc7631fa544d079c443800f4420d1b173f Gitweb: https://git.kernel.org/tip/5a8897cc7631fa544d079c443800f4420d1b173f Author: Will DeaconAuthorDate: Tue, 24 Oct 2017 11:22:50 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:34 +0200 locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics As part of the fight against smp_read_barrier_depends(), we require dependency ordering to be preserved when a dependency is headed by a load performed using an atomic operation. This patch adds smp_read_barrier_depends() to the _release() and _relaxed() atomics on alpha, which otherwise lack anything to enforce dependency ordering. Signed-off-by: Will Deacon Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 13 + 1 file changed, 13 insertions(+) diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 498933a..16961a3 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -13,6 +13,15 @@ * than regular operations. */ +/* + * To ensure dependency ordering is preserved for the _relaxed and + * _release atomics, an smp_read_barrier_depends() is unconditionally + * inserted into the _relaxed variants, which are used to build the + * barriered versions. To avoid redundant back-to-back fences, we can + * define the _acquire and _fence versions explicitly. + */ +#define __atomic_op_acquire(op, args...) op##_relaxed(args) +#define __atomic_op_fence __atomic_op_release #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } @@ -60,6 +69,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)\ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -77,6 +87,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -111,6 +122,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -128,6 +140,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)\ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ }
[tip:locking/core] locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics
Commit-ID: 5a8897cc7631fa544d079c443800f4420d1b173f Gitweb: https://git.kernel.org/tip/5a8897cc7631fa544d079c443800f4420d1b173f Author: Will Deacon AuthorDate: Tue, 24 Oct 2017 11:22:50 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:34 +0200 locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics As part of the fight against smp_read_barrier_depends(), we require dependency ordering to be preserved when a dependency is headed by a load performed using an atomic operation. This patch adds smp_read_barrier_depends() to the _release() and _relaxed() atomics on alpha, which otherwise lack anything to enforce dependency ordering. Signed-off-by: Will Deacon Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-6-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/atomic.h | 13 + 1 file changed, 13 insertions(+) diff --git a/arch/alpha/include/asm/atomic.h b/arch/alpha/include/asm/atomic.h index 498933a..16961a3 100644 --- a/arch/alpha/include/asm/atomic.h +++ b/arch/alpha/include/asm/atomic.h @@ -13,6 +13,15 @@ * than regular operations. */ +/* + * To ensure dependency ordering is preserved for the _relaxed and + * _release atomics, an smp_read_barrier_depends() is unconditionally + * inserted into the _relaxed variants, which are used to build the + * barriered versions. To avoid redundant back-to-back fences, we can + * define the _acquire and _fence versions explicitly. + */ +#define __atomic_op_acquire(op, args...) op##_relaxed(args) +#define __atomic_op_fence __atomic_op_release #define ATOMIC_INIT(i) { (i) } #define ATOMIC64_INIT(i) { (i) } @@ -60,6 +69,7 @@ static inline int atomic_##op##_return_relaxed(int i, atomic_t *v)\ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -77,6 +87,7 @@ static inline int atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -111,6 +122,7 @@ static __inline__ long atomic64_##op##_return_relaxed(long i, atomic64_t * v) \ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ } @@ -128,6 +140,7 @@ static __inline__ long atomic64_fetch_##op##_relaxed(long i, atomic64_t * v)\ ".previous" \ :"=" (temp), "=m" (v->counter), "=" (result)\ :"Ir" (i), "m" (v->counter) : "memory");\ + smp_read_barrier_depends(); \ return result; \ }
[tip:locking/core] locking/barriers: Convert users of lockless_dereference() to READ_ONCE()
Commit-ID: 506458efaf153c1ea480591c5602a5a3ba5a3b76 Gitweb: https://git.kernel.org/tip/506458efaf153c1ea480591c5602a5a3ba5a3b76 Author: Will DeaconAuthorDate: Tue, 24 Oct 2017 11:22:48 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:33 +0200 locking/barriers: Convert users of lockless_dereference() to READ_ONCE() READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it can be used instead of lockless_dereference() without any change in semantics. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/mmu_context.h | 4 ++-- arch/x86/kernel/ldt.c | 2 +- drivers/md/dm-mpath.c | 20 ++-- fs/dcache.c| 4 ++-- fs/overlayfs/ovl_entry.h | 2 +- fs/overlayfs/readdir.c | 2 +- include/linux/rculist.h| 4 ++-- include/linux/rcupdate.h | 4 ++-- kernel/events/core.c | 4 ++-- kernel/seccomp.c | 2 +- kernel/task_work.c | 2 +- mm/slab.h | 2 +- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 80534d3..589af1e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) struct ldt_struct *ldt; /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = lockless_dereference(current->active_mm->context.ldt); + ldt = READ_ONCE(current->active_mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 3c856a1..efc5306 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -72,8 +72,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - /* lockless_dereference synchronizes with smp_store_release */ - ldt = lockless_dereference(mm->context.ldt); + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index f0e64db..0a21390 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -101,7 +101,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) static void install_ldt(struct mm_struct *current_mm, struct ldt_struct *ldt) { - /* Synchronizes with lockless_dereference in load_mm_ldt. */ + /* Synchronizes with READ_ONCE in load_mm_ldt. */ smp_store_release(_mm->context.ldt, ldt); /* Activate the LDT for all CPUs using current_mm. */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 11f273d..3f88c9d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m, pgpath = path_to_pgpath(path); - if (unlikely(lockless_dereference(m->current_pg) != pg)) { + if (unlikely(READ_ONCE(m->current_pg) != pg)) { /* Only update current_pgpath if pg changed */ spin_lock_irqsave(>lock, flags); m->current_pgpath = pgpath; @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) } /* Were we instructed to switch PG? */ - if (lockless_dereference(m->next_pg)) { + if (READ_ONCE(m->next_pg)) { spin_lock_irqsave(>lock, flags); pg = m->next_pg; if (!pg) { @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) /* Don't change PG until it has no remaining paths */ check_current_pg: - pg = lockless_dereference(m->current_pg); + pg = READ_ONCE(m->current_pg); if (pg) { pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct request *clone; /* Do we need to select a new pgpath? */ - pgpath = lockless_dereference(m->current_pgpath); + pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !test_bit(MPATHF_QUEUE_IO, >flags))
[tip:locking/core] locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()
Commit-ID: 76ebbe78f7390aee075a7f3768af197ded1bdfbb Gitweb: https://git.kernel.org/tip/76ebbe78f7390aee075a7f3768af197ded1bdfbb Author: Will DeaconAuthorDate: Tue, 24 Oct 2017 11:22:47 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:32 +0200 locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() In preparation for the removal of lockless_dereference(), which is the same as READ_ONCE() on all architectures other than Alpha, add an implicit smp_read_barrier_depends() to READ_ONCE() so that it can be used to head dependency chains on all architectures. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0808318..7d7b77d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -242,6 +242,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __read_once_size(&(x), __u.__c, sizeof(x)); \ else\ __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) #define READ_ONCE(x) __READ_ONCE(x, 1)
[tip:locking/core] locking/barriers: Convert users of lockless_dereference() to READ_ONCE()
Commit-ID: 506458efaf153c1ea480591c5602a5a3ba5a3b76 Gitweb: https://git.kernel.org/tip/506458efaf153c1ea480591c5602a5a3ba5a3b76 Author: Will Deacon AuthorDate: Tue, 24 Oct 2017 11:22:48 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:33 +0200 locking/barriers: Convert users of lockless_dereference() to READ_ONCE() READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it can be used instead of lockless_dereference() without any change in semantics. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 2 +- arch/x86/include/asm/mmu_context.h | 4 ++-- arch/x86/kernel/ldt.c | 2 +- drivers/md/dm-mpath.c | 20 ++-- fs/dcache.c| 4 ++-- fs/overlayfs/ovl_entry.h | 2 +- fs/overlayfs/readdir.c | 2 +- include/linux/rculist.h| 4 ++-- include/linux/rcupdate.h | 4 ++-- kernel/events/core.c | 4 ++-- kernel/seccomp.c | 2 +- kernel/task_work.c | 2 +- mm/slab.h | 2 +- 13 files changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 80534d3..589af1e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2371,7 +2371,7 @@ static unsigned long get_segment_base(unsigned int segment) struct ldt_struct *ldt; /* IRQs are off, so this synchronizes with smp_store_release */ - ldt = lockless_dereference(current->active_mm->context.ldt); + ldt = READ_ONCE(current->active_mm->context.ldt); if (!ldt || idx >= ldt->nr_entries) return 0; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 3c856a1..efc5306 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -72,8 +72,8 @@ static inline void load_mm_ldt(struct mm_struct *mm) #ifdef CONFIG_MODIFY_LDT_SYSCALL struct ldt_struct *ldt; - /* lockless_dereference synchronizes with smp_store_release */ - ldt = lockless_dereference(mm->context.ldt); + /* READ_ONCE synchronizes with smp_store_release */ + ldt = READ_ONCE(mm->context.ldt); /* * Any change to mm->context.ldt is followed by an IPI to all diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c index f0e64db..0a21390 100644 --- a/arch/x86/kernel/ldt.c +++ b/arch/x86/kernel/ldt.c @@ -101,7 +101,7 @@ static void finalize_ldt_struct(struct ldt_struct *ldt) static void install_ldt(struct mm_struct *current_mm, struct ldt_struct *ldt) { - /* Synchronizes with lockless_dereference in load_mm_ldt. */ + /* Synchronizes with READ_ONCE in load_mm_ldt. */ smp_store_release(_mm->context.ldt, ldt); /* Activate the LDT for all CPUs using current_mm. */ diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 11f273d..3f88c9d 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -366,7 +366,7 @@ static struct pgpath *choose_path_in_pg(struct multipath *m, pgpath = path_to_pgpath(path); - if (unlikely(lockless_dereference(m->current_pg) != pg)) { + if (unlikely(READ_ONCE(m->current_pg) != pg)) { /* Only update current_pgpath if pg changed */ spin_lock_irqsave(>lock, flags); m->current_pgpath = pgpath; @@ -390,7 +390,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) } /* Were we instructed to switch PG? */ - if (lockless_dereference(m->next_pg)) { + if (READ_ONCE(m->next_pg)) { spin_lock_irqsave(>lock, flags); pg = m->next_pg; if (!pg) { @@ -406,7 +406,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) /* Don't change PG until it has no remaining paths */ check_current_pg: - pg = lockless_dereference(m->current_pg); + pg = READ_ONCE(m->current_pg); if (pg) { pgpath = choose_path_in_pg(m, pg, nr_bytes); if (!IS_ERR_OR_NULL(pgpath)) @@ -473,7 +473,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, struct request *clone; /* Do we need to select a new pgpath? */ - pgpath = lockless_dereference(m->current_pgpath); + pgpath = READ_ONCE(m->current_pgpath); if (!pgpath || !test_bit(MPATHF_QUEUE_IO, >flags)) pgpath = choose_pgpath(m, nr_bytes); @@ -535,7 +535,7 @@ static int __multipath_map_bio(struct multipath *m, struct bio *bio, struct dm_m bool queue_io;
[tip:locking/core] locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE()
Commit-ID: 76ebbe78f7390aee075a7f3768af197ded1bdfbb Gitweb: https://git.kernel.org/tip/76ebbe78f7390aee075a7f3768af197ded1bdfbb Author: Will Deacon AuthorDate: Tue, 24 Oct 2017 11:22:47 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:32 +0200 locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() In preparation for the removal of lockless_dereference(), which is the same as READ_ONCE() on all architectures other than Alpha, add an implicit smp_read_barrier_depends() to READ_ONCE() so that it can be used to head dependency chains on all architectures. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 0808318..7d7b77d 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -242,6 +242,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __read_once_size(&(x), __u.__c, sizeof(x)); \ else\ __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) #define READ_ONCE(x) __READ_ONCE(x, 1)
[tip:locking/core] linux/compiler.h: Split into compiler.h and compiler_types.h
Commit-ID: d15155824c5014803d91b829736d249c500bdda6 Gitweb: https://git.kernel.org/tip/d15155824c5014803d91b829736d249c500bdda6 Author: Will DeaconAuthorDate: Tue, 24 Oct 2017 11:22:46 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:32 +0200 linux/compiler.h: Split into compiler.h and compiler_types.h linux/compiler.h is included indirectly by linux/types.h via uapi/linux/types.h -> uapi/linux/posix_types.h -> linux/stddef.h -> uapi/linux/stddef.h and is needed to provide a proper definition of offsetof. Unfortunately, compiler.h requires a definition of smp_read_barrier_depends() for defining lockless_dereference() and soon for defining READ_ONCE(), which means that all users of READ_ONCE() will need to include asm/barrier.h to avoid splats such as: In file included from include/uapi/linux/stddef.h:1:0, from include/linux/stddef.h:4, from arch/h8300/kernel/asm-offsets.c:11: include/linux/list.h: In function 'list_empty': >> include/linux/compiler.h:343:2: error: implicit declaration of function >> 'smp_read_barrier_depends' [-Werror=implicit-function-declaration] smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ ^ A better alternative is to include asm/barrier.h in linux/compiler.h, but this requires a type definition for "bool" on some architectures (e.g. x86), which is defined later by linux/types.h. Type "bool" is also used directly in linux/compiler.h, so the whole thing is pretty fragile. This patch splits compiler.h in two: compiler_types.h contains type annotations, definitions and the compiler-specific parts, whereas compiler.h #includes compiler-types.h and additionally defines macros such as {READ,WRITE.ACCESS}_ONCE(). uapi/linux/stddef.h and linux/linkage.h are then moved over to include linux/compiler_types.h, which fixes the build for h8 and blackfin. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ptrace.h | 3 +- arch/sparc/include/asm/ptrace.h | 1 + arch/um/include/shared/init.h | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h| 2 +- include/linux/compiler-intel.h | 2 +- include/linux/compiler.h| 265 +- include/linux/compiler_types.h | 274 include/linux/linkage.h | 2 +- include/uapi/linux/stddef.h | 2 +- scripts/headers_install.sh | 2 +- 11 files changed, 286 insertions(+), 271 deletions(-) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index e9c9a11..c7cdbb4 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); /* * kprobe-based event tracer support */ -#include -#include +#include #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) extern int regs_query_register_offset(const char *name); diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index d73428e..b383484 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -6,6 +6,7 @@ #if defined(__sparc__) && defined(__arch64__) #ifndef __ASSEMBLY__ +#include #include #include diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index 233e259..094e96c 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -40,7 +40,7 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -#include +#include /* These are for everybody (although not all archs will actually discard it in modules) */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de17999..5947a3e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 16d41de..ce8e965 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d4c7113..e438ac89 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't
[tip:locking/core] linux/compiler.h: Split into compiler.h and compiler_types.h
Commit-ID: d15155824c5014803d91b829736d249c500bdda6 Gitweb: https://git.kernel.org/tip/d15155824c5014803d91b829736d249c500bdda6 Author: Will Deacon AuthorDate: Tue, 24 Oct 2017 11:22:46 +0100 Committer: Ingo Molnar CommitDate: Tue, 24 Oct 2017 13:17:32 +0200 linux/compiler.h: Split into compiler.h and compiler_types.h linux/compiler.h is included indirectly by linux/types.h via uapi/linux/types.h -> uapi/linux/posix_types.h -> linux/stddef.h -> uapi/linux/stddef.h and is needed to provide a proper definition of offsetof. Unfortunately, compiler.h requires a definition of smp_read_barrier_depends() for defining lockless_dereference() and soon for defining READ_ONCE(), which means that all users of READ_ONCE() will need to include asm/barrier.h to avoid splats such as: In file included from include/uapi/linux/stddef.h:1:0, from include/linux/stddef.h:4, from arch/h8300/kernel/asm-offsets.c:11: include/linux/list.h: In function 'list_empty': >> include/linux/compiler.h:343:2: error: implicit declaration of function >> 'smp_read_barrier_depends' [-Werror=implicit-function-declaration] smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ ^ A better alternative is to include asm/barrier.h in linux/compiler.h, but this requires a type definition for "bool" on some architectures (e.g. x86), which is defined later by linux/types.h. Type "bool" is also used directly in linux/compiler.h, so the whole thing is pretty fragile. This patch splits compiler.h in two: compiler_types.h contains type annotations, definitions and the compiler-specific parts, whereas compiler.h #includes compiler-types.h and additionally defines macros such as {READ,WRITE.ACCESS}_ONCE(). uapi/linux/stddef.h and linux/linkage.h are then moved over to include linux/compiler_types.h, which fixes the build for h8 and blackfin. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm/include/asm/ptrace.h | 3 +- arch/sparc/include/asm/ptrace.h | 1 + arch/um/include/shared/init.h | 2 +- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h| 2 +- include/linux/compiler-intel.h | 2 +- include/linux/compiler.h| 265 +- include/linux/compiler_types.h | 274 include/linux/linkage.h | 2 +- include/uapi/linux/stddef.h | 2 +- scripts/headers_install.sh | 2 +- 11 files changed, 286 insertions(+), 271 deletions(-) diff --git a/arch/arm/include/asm/ptrace.h b/arch/arm/include/asm/ptrace.h index e9c9a11..c7cdbb4 100644 --- a/arch/arm/include/asm/ptrace.h +++ b/arch/arm/include/asm/ptrace.h @@ -126,8 +126,7 @@ extern unsigned long profile_pc(struct pt_regs *regs); /* * kprobe-based event tracer support */ -#include -#include +#include #define MAX_REG_OFFSET (offsetof(struct pt_regs, ARM_ORIG_r0)) extern int regs_query_register_offset(const char *name); diff --git a/arch/sparc/include/asm/ptrace.h b/arch/sparc/include/asm/ptrace.h index d73428e..b383484 100644 --- a/arch/sparc/include/asm/ptrace.h +++ b/arch/sparc/include/asm/ptrace.h @@ -6,6 +6,7 @@ #if defined(__sparc__) && defined(__arch64__) #ifndef __ASSEMBLY__ +#include #include #include diff --git a/arch/um/include/shared/init.h b/arch/um/include/shared/init.h index 233e259..094e96c 100644 --- a/arch/um/include/shared/init.h +++ b/arch/um/include/shared/init.h @@ -40,7 +40,7 @@ typedef int (*initcall_t)(void); typedef void (*exitcall_t)(void); -#include +#include /* These are for everybody (although not all archs will actually discard it in modules) */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de17999..5947a3e 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 16d41de..ce8e965 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d4c7113..e438ac89 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e95a263..0808318 100644 --- a/include/linux/compiler.h +++
[tip:locking/core] locking/arch, powerpc/rtas: Use arch_spin_lock() instead of arch_spin_lock_flags()
Commit-ID: 58788a9b6060890e481c8111fac43d065560ebcb Gitweb: https://git.kernel.org/tip/58788a9b6060890e481c8111fac43d065560ebcb Author: Will DeaconAuthorDate: Wed, 18 Oct 2017 12:51:09 +0100 Committer: Ingo Molnar CommitDate: Wed, 18 Oct 2017 15:15:07 +0200 locking/arch, powerpc/rtas: Use arch_spin_lock() instead of arch_spin_lock_flags() arch_spin_lock_flags() is an internal part of the spinlock implementation and is no longer available when SMP=n and DEBUG_SPINLOCK=y, so the PPC RTAS code fails to compile in this configuration: arch/powerpc/kernel/rtas.c: In function 'lock_rtas': >> arch/powerpc/kernel/rtas.c:81:2: error: implicit declaration of function >> 'arch_spin_lock_flags' [-Werror=implicit-function-declaration] arch_spin_lock_flags(, flags); ^~~~ Since there's no good reason to use arch_spin_lock_flags() here (the code in question already calls local_irq_save(flags)), switch it over to arch_spin_lock and get things building again. Reported-by: kbuild test robot Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508327469-20231-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1643e9e..3f1c4fc 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -78,7 +78,7 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - arch_spin_lock_flags(, flags); + arch_spin_lock(); return flags; }
[tip:locking/core] locking/arch, powerpc/rtas: Use arch_spin_lock() instead of arch_spin_lock_flags()
Commit-ID: 58788a9b6060890e481c8111fac43d065560ebcb Gitweb: https://git.kernel.org/tip/58788a9b6060890e481c8111fac43d065560ebcb Author: Will Deacon AuthorDate: Wed, 18 Oct 2017 12:51:09 +0100 Committer: Ingo Molnar CommitDate: Wed, 18 Oct 2017 15:15:07 +0200 locking/arch, powerpc/rtas: Use arch_spin_lock() instead of arch_spin_lock_flags() arch_spin_lock_flags() is an internal part of the spinlock implementation and is no longer available when SMP=n and DEBUG_SPINLOCK=y, so the PPC RTAS code fails to compile in this configuration: arch/powerpc/kernel/rtas.c: In function 'lock_rtas': >> arch/powerpc/kernel/rtas.c:81:2: error: implicit declaration of function >> 'arch_spin_lock_flags' [-Werror=implicit-function-declaration] arch_spin_lock_flags(, flags); ^~~~ Since there's no good reason to use arch_spin_lock_flags() here (the code in question already calls local_irq_save(flags)), switch it over to arch_spin_lock and get things building again. Reported-by: kbuild test robot Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508327469-20231-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/rtas.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 1643e9e..3f1c4fc 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -78,7 +78,7 @@ static unsigned long lock_rtas(void) local_irq_save(flags); preempt_disable(); - arch_spin_lock_flags(, flags); + arch_spin_lock(); return flags; }
[tip:locking/core] locking/arch: Remove dummy arch_{read,spin,write}_lock_flags() implementations
Commit-ID: a4c1887d4c1462b0ec5a8989f8ba3cdd9057a299 Gitweb: https://git.kernel.org/tip/a4c1887d4c1462b0ec5a8989f8ba3cdd9057a299 Author: Will DeaconAuthorDate: Tue, 3 Oct 2017 19:25:29 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:19 +0200 locking/arch: Remove dummy arch_{read,spin,write}_lock_flags() implementations The arch_{read,spin,write}_lock_flags() macros are simply mapped to the non-flags versions by the majority of architectures, so do this in core code and remove the dummy implementations. Also remove the implementation in spinlock_up.h, since all callers of do_raw_spin_lock_flags() call local_irq_save(flags) anyway. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h| 4 arch/arc/include/asm/spinlock.h | 4 arch/arm/include/asm/spinlock.h | 5 - arch/arm64/include/asm/spinlock.h| 5 - arch/blackfin/include/asm/spinlock.h | 6 -- arch/hexagon/include/asm/spinlock.h | 5 - arch/ia64/include/asm/spinlock.h | 5 +++-- arch/m32r/include/asm/spinlock.h | 4 arch/metag/include/asm/spinlock.h| 5 - arch/metag/include/asm/spinlock_lnkget.h | 3 --- arch/mips/include/asm/spinlock.h | 3 --- arch/mn10300/include/asm/spinlock.h | 4 +--- arch/parisc/include/asm/spinlock.h | 4 +--- arch/powerpc/include/asm/spinlock.h | 4 +--- arch/s390/include/asm/spinlock.h | 4 +--- arch/sh/include/asm/spinlock-cas.h | 4 arch/sh/include/asm/spinlock-llsc.h | 4 arch/sparc/include/asm/spinlock_32.h | 4 arch/sparc/include/asm/spinlock_64.h | 3 --- arch/tile/include/asm/spinlock_32.h | 6 -- arch/tile/include/asm/spinlock_64.h | 6 -- arch/x86/include/asm/spinlock.h | 3 --- arch/xtensa/include/asm/spinlock.h | 5 - include/asm-generic/qspinlock.h | 1 - include/linux/rwlock.h | 9 + include/linux/spinlock.h | 4 include/linux/spinlock_up.h | 8 27 files changed, 20 insertions(+), 102 deletions(-) diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 7bff6316..3e2b4a0 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -13,7 +13,6 @@ * We make no fairness assumptions. They have a cost. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) static inline int arch_spin_value_unlocked(arch_spinlock_t lock) @@ -160,7 +159,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock) lock->lock = 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index f85bb58..2ba04a7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -14,7 +14,6 @@ #include #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #ifdef CONFIG_ARC_HAS_LLSC @@ -410,7 +409,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #endif -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index d40a28f..daa8721 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -52,8 +52,6 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -270,7 +268,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 1504f2b..aa51a38 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -27,8 +27,6 @@ * instructions. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp;
[tip:locking/core] locking/arch: Remove dummy arch_{read,spin,write}_lock_flags() implementations
Commit-ID: a4c1887d4c1462b0ec5a8989f8ba3cdd9057a299 Gitweb: https://git.kernel.org/tip/a4c1887d4c1462b0ec5a8989f8ba3cdd9057a299 Author: Will Deacon AuthorDate: Tue, 3 Oct 2017 19:25:29 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:19 +0200 locking/arch: Remove dummy arch_{read,spin,write}_lock_flags() implementations The arch_{read,spin,write}_lock_flags() macros are simply mapped to the non-flags versions by the majority of architectures, so do this in core code and remove the dummy implementations. Also remove the implementation in spinlock_up.h, since all callers of do_raw_spin_lock_flags() call local_irq_save(flags) anyway. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-4-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h| 4 arch/arc/include/asm/spinlock.h | 4 arch/arm/include/asm/spinlock.h | 5 - arch/arm64/include/asm/spinlock.h| 5 - arch/blackfin/include/asm/spinlock.h | 6 -- arch/hexagon/include/asm/spinlock.h | 5 - arch/ia64/include/asm/spinlock.h | 5 +++-- arch/m32r/include/asm/spinlock.h | 4 arch/metag/include/asm/spinlock.h| 5 - arch/metag/include/asm/spinlock_lnkget.h | 3 --- arch/mips/include/asm/spinlock.h | 3 --- arch/mn10300/include/asm/spinlock.h | 4 +--- arch/parisc/include/asm/spinlock.h | 4 +--- arch/powerpc/include/asm/spinlock.h | 4 +--- arch/s390/include/asm/spinlock.h | 4 +--- arch/sh/include/asm/spinlock-cas.h | 4 arch/sh/include/asm/spinlock-llsc.h | 4 arch/sparc/include/asm/spinlock_32.h | 4 arch/sparc/include/asm/spinlock_64.h | 3 --- arch/tile/include/asm/spinlock_32.h | 6 -- arch/tile/include/asm/spinlock_64.h | 6 -- arch/x86/include/asm/spinlock.h | 3 --- arch/xtensa/include/asm/spinlock.h | 5 - include/asm-generic/qspinlock.h | 1 - include/linux/rwlock.h | 9 + include/linux/spinlock.h | 4 include/linux/spinlock_up.h | 8 27 files changed, 20 insertions(+), 102 deletions(-) diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 7bff6316..3e2b4a0 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -13,7 +13,6 @@ * We make no fairness assumptions. They have a cost. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #define arch_spin_is_locked(x) ((x)->lock != 0) static inline int arch_spin_value_unlocked(arch_spinlock_t lock) @@ -160,7 +159,4 @@ static inline void arch_write_unlock(arch_rwlock_t * lock) lock->lock = 0; } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* _ALPHA_SPINLOCK_H */ diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index f85bb58..2ba04a7 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -14,7 +14,6 @@ #include #define arch_spin_is_locked(x) ((x)->slock != __ARCH_SPIN_LOCK_UNLOCKED__) -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) #ifdef CONFIG_ARC_HAS_LLSC @@ -410,7 +409,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #endif -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index d40a28f..daa8721 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -52,8 +52,6 @@ static inline void dsb_sev(void) * memory. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned long tmp; @@ -270,7 +268,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } } -#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) -#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 1504f2b..aa51a38 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -27,8 +27,6 @@ * instructions. */ -#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) - static inline void arch_spin_lock(arch_spinlock_t *lock) { unsigned int tmp; @@ -303,9 +301,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) /* read_can_lock - would read_trylock() succeed? */ #define arch_read_can_lock(x)
[tip:locking/core] locking/arch: Remove dummy arch_{read,spin,write}_relax() implementations
Commit-ID: 0160fb177d484367e041ac251fca591a3e49660c Gitweb: https://git.kernel.org/tip/0160fb177d484367e041ac251fca591a3e49660c Author: Will DeaconAuthorDate: Tue, 3 Oct 2017 19:25:28 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:18 +0200 locking/arch: Remove dummy arch_{read,spin,write}_relax() implementations arch_{read,spin,write}_relax() are defined as cpu_relax() by the core code, so architectures that can't do better (i.e. most of them) don't need to bother with the dummy definitions. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arc/include/asm/spinlock.h | 4 arch/arm/include/asm/spinlock.h | 4 arch/arm64/include/asm/spinlock.h| 4 arch/blackfin/include/asm/spinlock.h | 4 arch/ia64/include/asm/spinlock.h | 4 arch/m32r/include/asm/spinlock.h | 4 arch/metag/include/asm/spinlock.h| 4 arch/metag/include/asm/spinlock_lnkget.h | 4 arch/mips/include/asm/spinlock.h | 4 arch/s390/include/asm/spinlock.h | 3 +++ arch/sh/include/asm/spinlock-cas.h | 4 arch/sh/include/asm/spinlock-llsc.h | 4 arch/sparc/include/asm/spinlock_32.h | 4 arch/sparc/include/asm/spinlock_64.h | 4 arch/x86/include/asm/spinlock.h | 4 15 files changed, 3 insertions(+), 56 deletions(-) diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index ce9bfcf..f85bb58 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -413,8 +413,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index f522326..d40a28f 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -273,8 +273,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 95ad710..1504f2b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -306,10 +306,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 607ef98..3885d12 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -82,10 +82,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) __raw_write_unlock_asm(>lock); } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif #endif /* !__BFIN_SPINLOCK_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index c728dda..ed1e6212 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -269,8 +269,4 @@ static inline int arch_read_trylock(arch_rwlock_t *x) return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 0026013..6809a9b 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -308,8 +308,4 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define
[tip:locking/core] locking/arch: Remove dummy arch_{read,spin,write}_relax() implementations
Commit-ID: 0160fb177d484367e041ac251fca591a3e49660c Gitweb: https://git.kernel.org/tip/0160fb177d484367e041ac251fca591a3e49660c Author: Will Deacon AuthorDate: Tue, 3 Oct 2017 19:25:28 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:18 +0200 locking/arch: Remove dummy arch_{read,spin,write}_relax() implementations arch_{read,spin,write}_relax() are defined as cpu_relax() by the core code, so architectures that can't do better (i.e. most of them) don't need to bother with the dummy definitions. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-3-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arc/include/asm/spinlock.h | 4 arch/arm/include/asm/spinlock.h | 4 arch/arm64/include/asm/spinlock.h| 4 arch/blackfin/include/asm/spinlock.h | 4 arch/ia64/include/asm/spinlock.h | 4 arch/m32r/include/asm/spinlock.h | 4 arch/metag/include/asm/spinlock.h| 4 arch/metag/include/asm/spinlock_lnkget.h | 4 arch/mips/include/asm/spinlock.h | 4 arch/s390/include/asm/spinlock.h | 3 +++ arch/sh/include/asm/spinlock-cas.h | 4 arch/sh/include/asm/spinlock-llsc.h | 4 arch/sparc/include/asm/spinlock_32.h | 4 arch/sparc/include/asm/spinlock_64.h | 4 arch/x86/include/asm/spinlock.h | 4 15 files changed, 3 insertions(+), 56 deletions(-) diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index ce9bfcf..f85bb58 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -413,8 +413,4 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index f522326..d40a28f 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -273,8 +273,4 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 95ad710..1504f2b 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -306,10 +306,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - /* See include/linux/spinlock.h */ #define smp_mb__after_spinlock() smp_mb() diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index 607ef98..3885d12 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -82,10 +82,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) __raw_write_unlock_asm(>lock); } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif #endif /* !__BFIN_SPINLOCK_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h index c728dda..ed1e6212 100644 --- a/arch/ia64/include/asm/spinlock.h +++ b/arch/ia64/include/asm/spinlock.h @@ -269,8 +269,4 @@ static inline int arch_read_trylock(arch_rwlock_t *x) return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; } -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/m32r/include/asm/spinlock.h b/arch/m32r/include/asm/spinlock.h index 0026013..6809a9b 100644 --- a/arch/m32r/include/asm/spinlock.h +++ b/arch/m32r/include/asm/spinlock.h @@ -308,8 +308,4 @@ static inline int arch_write_trylock(arch_rwlock_t *lock) #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) -#define arch_spin_relax(lock) cpu_relax() -#define arch_read_relax(lock) cpu_relax() -#define arch_write_relax(lock) cpu_relax() - #endif /* _ASM_M32R_SPINLOCK_H */ diff --git
[tip:locking/core] locking/rwsem, security/apparmor: Replace homebrew use of write_can_lock() with lockdep
Commit-ID: 26c4eb192c6224e5297496cead36404b62fb071b Gitweb: https://git.kernel.org/tip/26c4eb192c6224e5297496cead36404b62fb071b Author: Will DeaconAuthorDate: Tue, 3 Oct 2017 19:25:26 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:17 +0200 locking/rwsem, security/apparmor: Replace homebrew use of write_can_lock() with lockdep The lockdep subsystem provides a robust way to assert that a lock is held, so use that instead of write_can_lock, which can give incorrect results for qrwlocks. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Johansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- security/apparmor/include/lib.h | 11 --- security/apparmor/label.c | 8 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 436b3a7..f546707 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -19,17 +19,6 @@ #include "match.h" -/* Provide our own test for whether a write lock is held for asserts - * this is because on none SMP systems write_can_lock will always - * resolve to true, which is what you want for code making decisions - * based on it, but wrong for asserts checking that the lock is held - */ -#ifdef CONFIG_SMP -#define write_is_locked(X) !write_can_lock(X) -#else -#define write_is_locked(X) (1) -#endif /* CONFIG_SMP */ - /* * DEBUG remains global (no per profile flag) since it is mostly used in sysctl * which is not related to profile accesses. diff --git a/security/apparmor/label.c b/security/apparmor/label.c index c5b99b9..ad28e03 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -80,7 +80,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new) AA_BUG(!orig); AA_BUG(!new); - AA_BUG(!write_is_locked(_set(orig)->lock)); + lockdep_assert_held_exclusive(_set(orig)->lock); tmp = rcu_dereference_protected(orig->proxy->label, _ns(orig)->lock); @@ -571,7 +571,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new) AA_BUG(!ls); AA_BUG(!label); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); if (new) __aa_proxy_redirect(label, new); @@ -608,7 +608,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) AA_BUG(!ls); AA_BUG(!old); AA_BUG(!new); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); AA_BUG(new->flags & FLAG_IN_TREE); if (!label_is_stale(old)) @@ -645,7 +645,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, AA_BUG(!ls); AA_BUG(!label); AA_BUG(labels_set(label) != ls); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); AA_BUG(label->flags & FLAG_IN_TREE); /* Figure out where to put new node */
[tip:locking/core] locking/rwsem, security/apparmor: Replace homebrew use of write_can_lock() with lockdep
Commit-ID: 26c4eb192c6224e5297496cead36404b62fb071b Gitweb: https://git.kernel.org/tip/26c4eb192c6224e5297496cead36404b62fb071b Author: Will Deacon AuthorDate: Tue, 3 Oct 2017 19:25:26 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:17 +0200 locking/rwsem, security/apparmor: Replace homebrew use of write_can_lock() with lockdep The lockdep subsystem provides a robust way to assert that a lock is held, so use that instead of write_can_lock, which can give incorrect results for qrwlocks. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: John Johansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- security/apparmor/include/lib.h | 11 --- security/apparmor/label.c | 8 2 files changed, 4 insertions(+), 15 deletions(-) diff --git a/security/apparmor/include/lib.h b/security/apparmor/include/lib.h index 436b3a7..f546707 100644 --- a/security/apparmor/include/lib.h +++ b/security/apparmor/include/lib.h @@ -19,17 +19,6 @@ #include "match.h" -/* Provide our own test for whether a write lock is held for asserts - * this is because on none SMP systems write_can_lock will always - * resolve to true, which is what you want for code making decisions - * based on it, but wrong for asserts checking that the lock is held - */ -#ifdef CONFIG_SMP -#define write_is_locked(X) !write_can_lock(X) -#else -#define write_is_locked(X) (1) -#endif /* CONFIG_SMP */ - /* * DEBUG remains global (no per profile flag) since it is mostly used in sysctl * which is not related to profile accesses. diff --git a/security/apparmor/label.c b/security/apparmor/label.c index c5b99b9..ad28e03 100644 --- a/security/apparmor/label.c +++ b/security/apparmor/label.c @@ -80,7 +80,7 @@ void __aa_proxy_redirect(struct aa_label *orig, struct aa_label *new) AA_BUG(!orig); AA_BUG(!new); - AA_BUG(!write_is_locked(_set(orig)->lock)); + lockdep_assert_held_exclusive(_set(orig)->lock); tmp = rcu_dereference_protected(orig->proxy->label, _ns(orig)->lock); @@ -571,7 +571,7 @@ static bool __label_remove(struct aa_label *label, struct aa_label *new) AA_BUG(!ls); AA_BUG(!label); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); if (new) __aa_proxy_redirect(label, new); @@ -608,7 +608,7 @@ static bool __label_replace(struct aa_label *old, struct aa_label *new) AA_BUG(!ls); AA_BUG(!old); AA_BUG(!new); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); AA_BUG(new->flags & FLAG_IN_TREE); if (!label_is_stale(old)) @@ -645,7 +645,7 @@ static struct aa_label *__label_insert(struct aa_labelset *ls, AA_BUG(!ls); AA_BUG(!label); AA_BUG(labels_set(label) != ls); - AA_BUG(!write_is_locked(>lock)); + lockdep_assert_held_exclusive(>lock); AA_BUG(label->flags & FLAG_IN_TREE); /* Figure out where to put new node */
[tip:locking/core] locking/core: Remove {read,spin,write}_can_lock()
Commit-ID: a8a217c22116eff6c120d753c9934089fb229af0 Gitweb: https://git.kernel.org/tip/a8a217c22116eff6c120d753c9934089fb229af0 Author: Will DeaconAuthorDate: Tue, 3 Oct 2017 19:25:27 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:18 +0200 locking/core: Remove {read,spin,write}_can_lock() Outside of the locking code itself, {read,spin,write}_can_lock() have no users in tree. Apparmor (the last remaining user of write_can_lock()) got moved over to lockdep by the previous patch. This patch removes the use of {read,spin,write}_can_lock() from the BUILD_LOCK_OPS macro, deferring to the trylock operation for testing the lock status, and subsequently removes the unused macros altogether. They aren't guaranteed to work in a concurrent environment and can give incorrect results in the case of qrwlock. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h| 10 -- arch/arc/include/asm/spinlock.h | 3 --- arch/arm/include/asm/spinlock.h | 6 -- arch/blackfin/include/asm/spinlock.h | 10 -- arch/hexagon/include/asm/spinlock.h | 10 -- arch/ia64/include/asm/spinlock.h | 3 --- arch/m32r/include/asm/spinlock.h | 12 arch/metag/include/asm/spinlock_lnkget.h | 30 -- arch/metag/include/asm/spinlock_lock1.h | 20 arch/mn10300/include/asm/spinlock.h | 12 arch/parisc/include/asm/spinlock.h | 18 -- arch/powerpc/include/asm/spinlock.h | 3 --- arch/s390/include/asm/spinlock.h | 12 arch/sh/include/asm/spinlock-cas.h | 12 arch/sh/include/asm/spinlock-llsc.h | 12 arch/sparc/include/asm/spinlock_32.h | 3 --- arch/tile/include/asm/spinlock_32.h | 16 arch/tile/include/asm/spinlock_64.h | 18 -- arch/xtensa/include/asm/spinlock.h | 2 -- include/asm-generic/qrwlock.h| 20 include/linux/rwlock.h | 3 --- include/linux/spinlock.h | 11 --- include/linux/spinlock_up.h | 3 --- kernel/locking/spinlock.c| 6 ++ 24 files changed, 2 insertions(+), 253 deletions(-) diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 718ac0b..7bff6316 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -54,16 +54,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /***/ -static inline int arch_read_can_lock(arch_rwlock_t *lock) -{ - return (lock->lock & 1) == 0; -} - -static inline int arch_write_can_lock(arch_rwlock_t *lock) -{ - return lock->lock == 0; -} - static inline void arch_read_lock(arch_rwlock_t *lock) { long regx; diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 47efc84..ce9bfcf 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -410,9 +410,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #endif -#define arch_read_can_lock(x) ((x)->counter > 0) -#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) - #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index c030143..f522326 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -193,9 +193,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) dsb_sev(); } -/* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) - /* * Read locks are a bit more hairy: * - Exclusively load the lock value. @@ -273,9 +270,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } } -/* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x8000) - #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index f643143..607ef98 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -48,16 +48,6 @@ static
[tip:locking/core] locking/core: Remove {read,spin,write}_can_lock()
Commit-ID: a8a217c22116eff6c120d753c9934089fb229af0 Gitweb: https://git.kernel.org/tip/a8a217c22116eff6c120d753c9934089fb229af0 Author: Will Deacon AuthorDate: Tue, 3 Oct 2017 19:25:27 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 11:50:18 +0200 locking/core: Remove {read,spin,write}_can_lock() Outside of the locking code itself, {read,spin,write}_can_lock() have no users in tree. Apparmor (the last remaining user of write_can_lock()) got moved over to lockdep by the previous patch. This patch removes the use of {read,spin,write}_can_lock() from the BUILD_LOCK_OPS macro, deferring to the trylock operation for testing the lock status, and subsequently removes the unused macros altogether. They aren't guaranteed to work in a concurrent environment and can give incorrect results in the case of qrwlock. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paul...@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/spinlock.h| 10 -- arch/arc/include/asm/spinlock.h | 3 --- arch/arm/include/asm/spinlock.h | 6 -- arch/blackfin/include/asm/spinlock.h | 10 -- arch/hexagon/include/asm/spinlock.h | 10 -- arch/ia64/include/asm/spinlock.h | 3 --- arch/m32r/include/asm/spinlock.h | 12 arch/metag/include/asm/spinlock_lnkget.h | 30 -- arch/metag/include/asm/spinlock_lock1.h | 20 arch/mn10300/include/asm/spinlock.h | 12 arch/parisc/include/asm/spinlock.h | 18 -- arch/powerpc/include/asm/spinlock.h | 3 --- arch/s390/include/asm/spinlock.h | 12 arch/sh/include/asm/spinlock-cas.h | 12 arch/sh/include/asm/spinlock-llsc.h | 12 arch/sparc/include/asm/spinlock_32.h | 3 --- arch/tile/include/asm/spinlock_32.h | 16 arch/tile/include/asm/spinlock_64.h | 18 -- arch/xtensa/include/asm/spinlock.h | 2 -- include/asm-generic/qrwlock.h| 20 include/linux/rwlock.h | 3 --- include/linux/spinlock.h | 11 --- include/linux/spinlock_up.h | 3 --- kernel/locking/spinlock.c| 6 ++ 24 files changed, 2 insertions(+), 253 deletions(-) diff --git a/arch/alpha/include/asm/spinlock.h b/arch/alpha/include/asm/spinlock.h index 718ac0b..7bff6316 100644 --- a/arch/alpha/include/asm/spinlock.h +++ b/arch/alpha/include/asm/spinlock.h @@ -54,16 +54,6 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock) /***/ -static inline int arch_read_can_lock(arch_rwlock_t *lock) -{ - return (lock->lock & 1) == 0; -} - -static inline int arch_write_can_lock(arch_rwlock_t *lock) -{ - return lock->lock == 0; -} - static inline void arch_read_lock(arch_rwlock_t *lock) { long regx; diff --git a/arch/arc/include/asm/spinlock.h b/arch/arc/include/asm/spinlock.h index 47efc84..ce9bfcf 100644 --- a/arch/arc/include/asm/spinlock.h +++ b/arch/arc/include/asm/spinlock.h @@ -410,9 +410,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #endif -#define arch_read_can_lock(x) ((x)->counter > 0) -#define arch_write_can_lock(x) ((x)->counter == __ARCH_RW_LOCK_UNLOCKED__) - #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/arm/include/asm/spinlock.h b/arch/arm/include/asm/spinlock.h index c030143..f522326 100644 --- a/arch/arm/include/asm/spinlock.h +++ b/arch/arm/include/asm/spinlock.h @@ -193,9 +193,6 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) dsb_sev(); } -/* write_can_lock - would write_trylock() succeed? */ -#define arch_write_can_lock(x) (ACCESS_ONCE((x)->lock) == 0) - /* * Read locks are a bit more hairy: * - Exclusively load the lock value. @@ -273,9 +270,6 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) } } -/* read_can_lock - would read_trylock() succeed? */ -#define arch_read_can_lock(x) (ACCESS_ONCE((x)->lock) < 0x8000) - #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) diff --git a/arch/blackfin/include/asm/spinlock.h b/arch/blackfin/include/asm/spinlock.h index f643143..607ef98 100644 --- a/arch/blackfin/include/asm/spinlock.h +++ b/arch/blackfin/include/asm/spinlock.h @@ -48,16 +48,6 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) __raw_spin_unlock_asm(>lock); } -static inline int arch_read_can_lock(arch_rwlock_t *rw) -{ - return
[tip:perf/urgent] perf/core: Avoid freeing static PMU contexts when PMU is unregistered
Commit-ID: df0062b27ebf473b372914a3e3574d93790e2b72 Gitweb: https://git.kernel.org/tip/df0062b27ebf473b372914a3e3574d93790e2b72 Author: Will DeaconAuthorDate: Tue, 3 Oct 2017 15:20:50 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 10:06:54 +0200 perf/core: Avoid freeing static PMU contexts when PMU is unregistered Since commit: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") ... when a PMU is unregistered then its associated ->pmu_cpu_context is unconditionally freed. Whilst this is fine for dynamically allocated context types (i.e. those registered using perf_invalid_context), this causes a problem for sharing of static contexts such as perf_{sw,hw}_context, which are used by multiple built-in PMUs and effectively have a global lifetime. Whilst testing the ARM SPE driver, which must use perf_sw_context to support per-task AUX tracing, unregistering the driver as a result of a module unload resulted in: Unable to handle kernel NULL pointer dereference at virtual address 0038 Internal error: Oops: 9604 [#1] PREEMPT SMP Modules linked in: [last unloaded: arm_spe_pmu] PC is at ctx_resched+0x38/0xe8 LR is at perf_event_exec+0x20c/0x278 [...] ctx_resched+0x38/0xe8 perf_event_exec+0x20c/0x278 setup_new_exec+0x88/0x118 load_elf_binary+0x26c/0x109c search_binary_handler+0x90/0x298 do_execveat_common.isra.14+0x540/0x618 SyS_execve+0x38/0x48 since the software context has been freed and the ctx.pmu->pmu_disable_count field has been set to NULL. This patch fixes the problem by avoiding the freeing of static PMU contexts altogether. Whilst the sharing of dynamic contexts is questionable, this actually requires the caller to share their context pointer explicitly and so the burden is on them to manage the object lifetime. Reported-by: Kim Phillips Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") Link: http://lkml.kernel.org/r/1507040450-7730-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 1 file changed, 8 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e2..243bfc6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8955,6 +8955,14 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) static void free_pmu_context(struct pmu *pmu) { + /* +* Static contexts such as perf_sw_context have a global lifetime +* and may be shared between different PMUs. Avoid freeing them +* when a single PMU is going away. +*/ + if (pmu->task_ctx_nr > perf_invalid_context) + return; + mutex_lock(_lock); free_percpu(pmu->pmu_cpu_context); mutex_unlock(_lock);
[tip:perf/urgent] perf/core: Avoid freeing static PMU contexts when PMU is unregistered
Commit-ID: df0062b27ebf473b372914a3e3574d93790e2b72 Gitweb: https://git.kernel.org/tip/df0062b27ebf473b372914a3e3574d93790e2b72 Author: Will Deacon AuthorDate: Tue, 3 Oct 2017 15:20:50 +0100 Committer: Ingo Molnar CommitDate: Tue, 10 Oct 2017 10:06:54 +0200 perf/core: Avoid freeing static PMU contexts when PMU is unregistered Since commit: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") ... when a PMU is unregistered then its associated ->pmu_cpu_context is unconditionally freed. Whilst this is fine for dynamically allocated context types (i.e. those registered using perf_invalid_context), this causes a problem for sharing of static contexts such as perf_{sw,hw}_context, which are used by multiple built-in PMUs and effectively have a global lifetime. Whilst testing the ARM SPE driver, which must use perf_sw_context to support per-task AUX tracing, unregistering the driver as a result of a module unload resulted in: Unable to handle kernel NULL pointer dereference at virtual address 0038 Internal error: Oops: 9604 [#1] PREEMPT SMP Modules linked in: [last unloaded: arm_spe_pmu] PC is at ctx_resched+0x38/0xe8 LR is at perf_event_exec+0x20c/0x278 [...] ctx_resched+0x38/0xe8 perf_event_exec+0x20c/0x278 setup_new_exec+0x88/0x118 load_elf_binary+0x26c/0x109c search_binary_handler+0x90/0x298 do_execveat_common.isra.14+0x540/0x618 SyS_execve+0x38/0x48 since the software context has been freed and the ctx.pmu->pmu_disable_count field has been set to NULL. This patch fixes the problem by avoiding the freeing of static PMU contexts altogether. Whilst the sharing of dynamic contexts is questionable, this actually requires the caller to share their context pointer explicitly and so the burden is on them to manage the object lifetime. Reported-by: Kim Phillips Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 1fd7e4169954 ("perf/core: Remove perf_cpu_context::unique_pmu") Link: http://lkml.kernel.org/r/1507040450-7730-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 8 1 file changed, 8 insertions(+) diff --git a/kernel/events/core.c b/kernel/events/core.c index 6bc21e2..243bfc6 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8955,6 +8955,14 @@ static struct perf_cpu_context __percpu *find_pmu_context(int ctxn) static void free_pmu_context(struct pmu *pmu) { + /* +* Static contexts such as perf_sw_context have a global lifetime +* and may be shared between different PMUs. Avoid freeing them +* when a single PMU is going away. +*/ + if (pmu->task_ctx_nr > perf_invalid_context) + return; + mutex_lock(_lock); free_percpu(pmu->pmu_cpu_context); mutex_unlock(_lock);
[tip:perf/core] perf/aux: Ensure aux_wakeup represents most recent wakeup index
Commit-ID: d9a50b0256f06bd39a1bed1ba40baec37c356b11 Gitweb: http://git.kernel.org/tip/d9a50b0256f06bd39a1bed1ba40baec37c356b11 Author: Will DeaconAuthorDate: Wed, 16 Aug 2017 17:18:17 +0100 Committer: Ingo Molnar CommitDate: Fri, 25 Aug 2017 11:04:16 +0200 perf/aux: Ensure aux_wakeup represents most recent wakeup index The aux_watermark member of struct ring_buffer represents the period (in terms of bytes) at which wakeup events should be generated when data is written to the aux buffer in non-snapshot mode. On hardware that cannot generate an interrupt when the aux_head reaches an arbitrary wakeup index (such as ARM SPE), the aux_head sampled from handle->head in perf_aux_output_{skip,end} may in fact be past the wakeup index. This can lead to wakeup slowly falling behind the head. For example, consider the case where hardware can only generate an interrupt on a page-boundary and the aux buffer is initialised as follows: // Buffer size is 2 * PAGE_SIZE rb->aux_head = rb->aux_wakeup = 0 rb->aux_watermark = PAGE_SIZE / 2 following the first perf_aux_output_begin call, the handle is initialised with: handle->head = 0 handle->size = 2 * PAGE_SIZE handle->wakeup = PAGE_SIZE / 2 and the hardware will be programmed to generate an interrupt at PAGE_SIZE. When the interrupt is raised, the hardware head will be at PAGE_SIZE, so calling perf_aux_output_end(handle, PAGE_SIZE) puts the ring buffer into the following state: rb->aux_head = PAGE_SIZE rb->aux_wakeup = PAGE_SIZE / 2 rb->aux_watermark = PAGE_SIZE / 2 and then the next call to perf_aux_output_begin will result in: handle->head = handle->wakeup = PAGE_SIZE for which the semantics are unclear and, for a smaller aux_watermark (e.g. PAGE_SIZE / 4), then the wakeup would in fact be behind head at this point. This patch fixes the problem by rounding down the aux_head (as sampled from the handle) to the nearest aux_watermark boundary when updating rb->aux_wakeup, therefore taking into account any overruns by the hardware. Reported-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h| 2 +- kernel/events/ring_buffer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2941b86..5377c59 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,7 +40,7 @@ struct ring_buffer { /* AUX area */ longaux_head; local_t aux_nest; - longaux_wakeup; + longaux_wakeup; /* last aux_watermark boundary crossed by aux_head */ unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 25437fd..af71a84e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -453,7 +453,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); } if (wakeup) { @@ -486,7 +486,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; }
[tip:perf/core] perf/aux: Ensure aux_wakeup represents most recent wakeup index
Commit-ID: d9a50b0256f06bd39a1bed1ba40baec37c356b11 Gitweb: http://git.kernel.org/tip/d9a50b0256f06bd39a1bed1ba40baec37c356b11 Author: Will Deacon AuthorDate: Wed, 16 Aug 2017 17:18:17 +0100 Committer: Ingo Molnar CommitDate: Fri, 25 Aug 2017 11:04:16 +0200 perf/aux: Ensure aux_wakeup represents most recent wakeup index The aux_watermark member of struct ring_buffer represents the period (in terms of bytes) at which wakeup events should be generated when data is written to the aux buffer in non-snapshot mode. On hardware that cannot generate an interrupt when the aux_head reaches an arbitrary wakeup index (such as ARM SPE), the aux_head sampled from handle->head in perf_aux_output_{skip,end} may in fact be past the wakeup index. This can lead to wakeup slowly falling behind the head. For example, consider the case where hardware can only generate an interrupt on a page-boundary and the aux buffer is initialised as follows: // Buffer size is 2 * PAGE_SIZE rb->aux_head = rb->aux_wakeup = 0 rb->aux_watermark = PAGE_SIZE / 2 following the first perf_aux_output_begin call, the handle is initialised with: handle->head = 0 handle->size = 2 * PAGE_SIZE handle->wakeup = PAGE_SIZE / 2 and the hardware will be programmed to generate an interrupt at PAGE_SIZE. When the interrupt is raised, the hardware head will be at PAGE_SIZE, so calling perf_aux_output_end(handle, PAGE_SIZE) puts the ring buffer into the following state: rb->aux_head = PAGE_SIZE rb->aux_wakeup = PAGE_SIZE / 2 rb->aux_watermark = PAGE_SIZE / 2 and then the next call to perf_aux_output_begin will result in: handle->head = handle->wakeup = PAGE_SIZE for which the semantics are unclear and, for a smaller aux_watermark (e.g. PAGE_SIZE / 4), then the wakeup would in fact be behind head at this point. This patch fixes the problem by rounding down the aux_head (as sampled from the handle) to the nearest aux_watermark boundary when updating rb->aux_wakeup, therefore taking into account any overruns by the hardware. Reported-by: Mark Rutland Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Acked-by: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h| 2 +- kernel/events/ring_buffer.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 2941b86..5377c59 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -40,7 +40,7 @@ struct ring_buffer { /* AUX area */ longaux_head; local_t aux_nest; - longaux_wakeup; + longaux_wakeup; /* last aux_watermark boundary crossed by aux_head */ unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 25437fd..af71a84e 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -453,7 +453,7 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); } if (wakeup) { @@ -486,7 +486,7 @@ int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) rb->user_page->aux_head = rb->aux_head; if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - rb->aux_wakeup += rb->aux_watermark; + rb->aux_wakeup = rounddown(rb->aux_head, rb->aux_watermark); handle->wakeup = rb->aux_wakeup + rb->aux_watermark; }
[tip:perf/core] perf/aux: Make aux_{head,wakeup} ring_buffer members long
Commit-ID: 2ab346cfb0decf01523949e29f5cf542f2304611 Gitweb: http://git.kernel.org/tip/2ab346cfb0decf01523949e29f5cf542f2304611 Author: Will DeaconAuthorDate: Wed, 16 Aug 2017 17:18:16 +0100 Committer: Ingo Molnar CommitDate: Fri, 25 Aug 2017 11:04:15 +0200 perf/aux: Make aux_{head,wakeup} ring_buffer members long The aux_head and aux_wakeup members of struct ring_buffer are defined using the local_t type, despite the fact that they are only accessed via the perf_aux_output_*() functions, which cannot race with each other for a given ring buffer. This patch changes the type of the members to long, so we can avoid using the local_*() API where it isn't needed. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h| 4 ++-- kernel/events/ring_buffer.c | 31 ++- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 486fd78..2941b86 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -38,9 +38,9 @@ struct ring_buffer { struct user_struct *mmap_user; /* AUX area */ - local_t aux_head; + longaux_head; local_t aux_nest; - local_t aux_wakeup; + longaux_wakeup; unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ee97196..25437fd 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -367,7 +367,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (WARN_ON_ONCE(local_xchg(>aux_nest, 1))) goto err_put; - aux_head = local_read(>aux_head); + aux_head = rb->aux_head; handle->rb = rb; handle->event = event; @@ -382,7 +382,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, */ if (!rb->aux_overwrite) { aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); - handle->wakeup = local_read(>aux_wakeup) + rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); @@ -433,12 +433,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; - local_set(>aux_head, aux_head); + rb->aux_head = aux_head; } else { handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; - aux_head = local_read(>aux_head); - local_add(size, >aux_head); + aux_head = rb->aux_head; + rb->aux_head += size; } if (size || handle->aux_flags) { @@ -450,11 +450,10 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags); } - aux_head = rb->user_page->aux_head = local_read(>aux_head); - - if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - local_add(rb->aux_watermark, >aux_wakeup); + rb->aux_wakeup += rb->aux_watermark; } if (wakeup) { @@ -478,22 +477,20 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - unsigned long aux_head; if (size > handle->size) return -ENOSPC; - local_add(size, >aux_head); + rb->aux_head += size; - aux_head = rb->user_page->aux_head = local_read(>aux_head); - if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); -
[tip:perf/core] perf/aux: Make aux_{head,wakeup} ring_buffer members long
Commit-ID: 2ab346cfb0decf01523949e29f5cf542f2304611 Gitweb: http://git.kernel.org/tip/2ab346cfb0decf01523949e29f5cf542f2304611 Author: Will Deacon AuthorDate: Wed, 16 Aug 2017 17:18:16 +0100 Committer: Ingo Molnar CommitDate: Fri, 25 Aug 2017 11:04:15 +0200 perf/aux: Make aux_{head,wakeup} ring_buffer members long The aux_head and aux_wakeup members of struct ring_buffer are defined using the local_t type, despite the fact that they are only accessed via the perf_aux_output_*() functions, which cannot race with each other for a given ring buffer. This patch changes the type of the members to long, so we can avoid using the local_*() API where it isn't needed. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Mark Rutland Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1502900297-21839-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/internal.h| 4 ++-- kernel/events/ring_buffer.c | 31 ++- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/kernel/events/internal.h b/kernel/events/internal.h index 486fd78..2941b86 100644 --- a/kernel/events/internal.h +++ b/kernel/events/internal.h @@ -38,9 +38,9 @@ struct ring_buffer { struct user_struct *mmap_user; /* AUX area */ - local_t aux_head; + longaux_head; local_t aux_nest; - local_t aux_wakeup; + longaux_wakeup; unsigned long aux_pgoff; int aux_nr_pages; int aux_overwrite; diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index ee97196..25437fd 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -367,7 +367,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, if (WARN_ON_ONCE(local_xchg(>aux_nest, 1))) goto err_put; - aux_head = local_read(>aux_head); + aux_head = rb->aux_head; handle->rb = rb; handle->event = event; @@ -382,7 +382,7 @@ void *perf_aux_output_begin(struct perf_output_handle *handle, */ if (!rb->aux_overwrite) { aux_tail = ACCESS_ONCE(rb->user_page->aux_tail); - handle->wakeup = local_read(>aux_wakeup) + rb->aux_watermark; + handle->wakeup = rb->aux_wakeup + rb->aux_watermark; if (aux_head - aux_tail < perf_aux_size(rb)) handle->size = CIRC_SPACE(aux_head, aux_tail, perf_aux_size(rb)); @@ -433,12 +433,12 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags |= PERF_AUX_FLAG_OVERWRITE; aux_head = handle->head; - local_set(>aux_head, aux_head); + rb->aux_head = aux_head; } else { handle->aux_flags &= ~PERF_AUX_FLAG_OVERWRITE; - aux_head = local_read(>aux_head); - local_add(size, >aux_head); + aux_head = rb->aux_head; + rb->aux_head += size; } if (size || handle->aux_flags) { @@ -450,11 +450,10 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) handle->aux_flags); } - aux_head = rb->user_page->aux_head = local_read(>aux_head); - - if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { wakeup = true; - local_add(rb->aux_watermark, >aux_wakeup); + rb->aux_wakeup += rb->aux_watermark; } if (wakeup) { @@ -478,22 +477,20 @@ void perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { struct ring_buffer *rb = handle->rb; - unsigned long aux_head; if (size > handle->size) return -ENOSPC; - local_add(size, >aux_head); + rb->aux_head += size; - aux_head = rb->user_page->aux_head = local_read(>aux_head); - if (aux_head - local_read(>aux_wakeup) >= rb->aux_watermark) { + rb->user_page->aux_head = rb->aux_head; + if (rb->aux_head - rb->aux_wakeup >= rb->aux_watermark) { perf_output_wakeup(handle); - local_add(rb->aux_watermark, >aux_wakeup); - handle->wakeup = local_read(>aux_wakeup) + -rb->aux_watermark; + rb->aux_wakeup += rb->aux_watermark; + handle->wakeup =
[tip:timers/urgent] arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW
Commit-ID: dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Gitweb: http://git.kernel.org/tip/dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Author: Will DeaconAuthorDate: Thu, 8 Jun 2017 16:44:22 -0700 Committer: Thomas Gleixner CommitDate: Tue, 20 Jun 2017 10:41:51 +0200 arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW Recently vDSO support for CLOCK_MONOTONIC_RAW was added in 49eea433b326 ("arm64: Add support for CLOCK_MONOTONIC_RAW in clock_gettime() vDSO"). Noticing that the core timekeeping code never set tkr_raw.xtime_nsec, the vDSO implementation didn't bother exposing it via the data page and instead took the unshifted tk->raw_time.tv_nsec value which was then immediately shifted left in the vDSO code. Unfortunately, by accellerating the MONOTONIC_RAW clockid, it uncovered potential 1ns time inconsistencies caused by the timekeeping core not handing sub-ns resolution. Now that the core code has been fixed and is actually setting tkr_raw.xtime_nsec, we need to take that into account in the vDSO by adding it to the shifted raw_time value, in order to fix the user-visible inconsistency. Rather than do that at each use (and expand the data page in the process), instead perform the shift/addition operation when populating the data page and remove the shift from the vDSO code entirely. [jstultz: minor whitespace tweak, tried to improve commit message to make it more clear this fixes a regression] Reported-by: John Stultz Signed-off-by: Will Deacon Signed-off-by: John Stultz Tested-by: Daniel Mentz Acked-by: Kevin Brodsky Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: "stable #4 . 8+" Cc: Miroslav Lichvar Link: http://lkml.kernel.org/r/1496965462-20003-4-git-send-email-john.stu...@linaro.org Signed-off-by: Thomas Gleixner --- arch/arm64/kernel/vdso.c | 5 +++-- arch/arm64/kernel/vdso/gettimeofday.S | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31..d0cb007 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk) /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last= tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec= tk->raw_time.tv_nsec; + vdso_data->raw_time_nsec= (tk->raw_time.tv_nsec << + tk->tkr_raw.shift) + + tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b467..76320e9 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12
[tip:timers/urgent] arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW
Commit-ID: dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Gitweb: http://git.kernel.org/tip/dbb236c1ceb697a559e0694ac4c9e7b9131d0b16 Author: Will Deacon AuthorDate: Thu, 8 Jun 2017 16:44:22 -0700 Committer: Thomas Gleixner CommitDate: Tue, 20 Jun 2017 10:41:51 +0200 arm64/vdso: Fix nsec handling for CLOCK_MONOTONIC_RAW Recently vDSO support for CLOCK_MONOTONIC_RAW was added in 49eea433b326 ("arm64: Add support for CLOCK_MONOTONIC_RAW in clock_gettime() vDSO"). Noticing that the core timekeeping code never set tkr_raw.xtime_nsec, the vDSO implementation didn't bother exposing it via the data page and instead took the unshifted tk->raw_time.tv_nsec value which was then immediately shifted left in the vDSO code. Unfortunately, by accellerating the MONOTONIC_RAW clockid, it uncovered potential 1ns time inconsistencies caused by the timekeeping core not handing sub-ns resolution. Now that the core code has been fixed and is actually setting tkr_raw.xtime_nsec, we need to take that into account in the vDSO by adding it to the shifted raw_time value, in order to fix the user-visible inconsistency. Rather than do that at each use (and expand the data page in the process), instead perform the shift/addition operation when populating the data page and remove the shift from the vDSO code entirely. [jstultz: minor whitespace tweak, tried to improve commit message to make it more clear this fixes a regression] Reported-by: John Stultz Signed-off-by: Will Deacon Signed-off-by: John Stultz Tested-by: Daniel Mentz Acked-by: Kevin Brodsky Cc: Prarit Bhargava Cc: Richard Cochran Cc: Stephen Boyd Cc: "stable #4 . 8+" Cc: Miroslav Lichvar Link: http://lkml.kernel.org/r/1496965462-20003-4-git-send-email-john.stu...@linaro.org Signed-off-by: Thomas Gleixner --- arch/arm64/kernel/vdso.c | 5 +++-- arch/arm64/kernel/vdso/gettimeofday.S | 1 - 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 41b6e31..d0cb007 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk) /* tkr_mono.cycle_last == tkr_raw.cycle_last */ vdso_data->cs_cycle_last= tk->tkr_mono.cycle_last; vdso_data->raw_time_sec = tk->raw_time.tv_sec; - vdso_data->raw_time_nsec= tk->raw_time.tv_nsec; + vdso_data->raw_time_nsec= (tk->raw_time.tv_nsec << + tk->tkr_raw.shift) + + tk->tkr_raw.xtime_nsec; vdso_data->xtime_clock_sec = tk->xtime_sec; vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec; - /* tkr_raw.xtime_nsec == 0 */ vdso_data->cs_mono_mult = tk->tkr_mono.mult; vdso_data->cs_raw_mult = tk->tkr_raw.mult; /* tkr_mono.shift == tkr_raw.shift */ diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index e00b467..76320e9 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -256,7 +256,6 @@ monotonic_raw: seqcnt_check fail=monotonic_raw /* All computations are done with left-shifted nsecs. */ - lsl x14, x14, x12 get_nsec_per_sec res=x9 lsl x9, x9, x12
[tip:perf/core] perf/core: Keep AUX flags in the output handle
Commit-ID: f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Gitweb: http://git.kernel.org/tip/f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Author: Will DeaconAuthorDate: Mon, 20 Feb 2017 15:33:50 +0200 Committer: Ingo Molnar CommitDate: Thu, 16 Mar 2017 09:51:10 +0100 perf/core: Keep AUX flags in the output handle In preparation for adding more flags to perf AUX records, introduce a separate API for setting the flags for a session, rather than appending more bool arguments to perf_aux_output_end. This allows to set each flag at the time a corresponding condition is detected, instead of tracking it in each driver's private state. Signed-off-by: Will Deacon Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vi...@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-3-alexander.shish...@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 16 +-- arch/x86/events/intel/pt.c | 17 ++-- arch/x86/events/intel/pt.h | 1 - drivers/hwtracing/coresight/coresight-etb10.c| 9 +++ drivers/hwtracing/coresight/coresight-etm-perf.c | 9 +++ drivers/hwtracing/coresight/coresight-priv.h | 2 -- drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++-- include/linux/coresight.h| 2 +- include/linux/perf_event.h | 8 +++--- kernel/events/ring_buffer.c | 34 10 files changed, 56 insertions(+), 49 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e3..8ae8c5c 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,7 +63,6 @@ struct bts_buffer { unsigned intcur_buf; boolsnapshot; local_t data_size; - local_t lost; local_t head; unsigned long end; void**data_pages; @@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts) return; if (ds->bts_index >= ds->bts_absolute_maximum) - local_inc(>lost); + perf_aux_output_flag(>handle, +PERF_AUX_FLAG_TRUNCATED); /* * old and head are always in the same physical buffer, so we @@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags) return; fail_end_stop: - perf_aux_output_end(>handle, 0, false); + perf_aux_output_end(>handle, 0); fail_stop: event->hw.state = PERF_HES_STOPPED; @@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(>data_size, buf->nr_pages << PAGE_SHIFT); - - perf_aux_output_end(>handle, local_xchg(>data_size, 0), - !!local_xchg(>lost, 0)); + perf_aux_output_end(>handle, + local_xchg(>data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -484,8 +483,7 @@ int intel_bts_interrupt(void) if (old_head == local_read(>head)) return handled; - perf_aux_output_end(>handle, local_xchg(>data_size, 0), - !!local_xchg(>lost, 0)); + perf_aux_output_end(>handle, local_xchg(>data_size, 0)); buf = perf_aux_output_begin(>handle, event); if (buf) @@ -500,7 +498,7 @@ int intel_bts_interrupt(void) * cleared handle::event */ barrier(); - perf_aux_output_end(>handle, 0, false); + perf_aux_output_end(>handle, 0); } } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5900471..0218728 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -753,7 +753,8 @@ static void pt_handle_status(struct pt *pt) */ if (!pt_cap_get(PT_CAP_topa_multiple_entries) || buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { -
[tip:perf/core] perf/core: Keep AUX flags in the output handle
Commit-ID: f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Gitweb: http://git.kernel.org/tip/f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Author: Will Deacon AuthorDate: Mon, 20 Feb 2017 15:33:50 +0200 Committer: Ingo Molnar CommitDate: Thu, 16 Mar 2017 09:51:10 +0100 perf/core: Keep AUX flags in the output handle In preparation for adding more flags to perf AUX records, introduce a separate API for setting the flags for a session, rather than appending more bool arguments to perf_aux_output_end. This allows to set each flag at the time a corresponding condition is detected, instead of tracking it in each driver's private state. Signed-off-by: Will Deacon Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vi...@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-3-alexander.shish...@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/bts.c | 16 +-- arch/x86/events/intel/pt.c | 17 ++-- arch/x86/events/intel/pt.h | 1 - drivers/hwtracing/coresight/coresight-etb10.c| 9 +++ drivers/hwtracing/coresight/coresight-etm-perf.c | 9 +++ drivers/hwtracing/coresight/coresight-priv.h | 2 -- drivers/hwtracing/coresight/coresight-tmc-etf.c | 7 +++-- include/linux/coresight.h| 2 +- include/linux/perf_event.h | 8 +++--- kernel/events/ring_buffer.c | 34 10 files changed, 56 insertions(+), 49 deletions(-) diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 982c9e3..8ae8c5c 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -63,7 +63,6 @@ struct bts_buffer { unsigned intcur_buf; boolsnapshot; local_t data_size; - local_t lost; local_t head; unsigned long end; void**data_pages; @@ -199,7 +198,8 @@ static void bts_update(struct bts_ctx *bts) return; if (ds->bts_index >= ds->bts_absolute_maximum) - local_inc(>lost); + perf_aux_output_flag(>handle, +PERF_AUX_FLAG_TRUNCATED); /* * old and head are always in the same physical buffer, so we @@ -276,7 +276,7 @@ static void bts_event_start(struct perf_event *event, int flags) return; fail_end_stop: - perf_aux_output_end(>handle, 0, false); + perf_aux_output_end(>handle, 0); fail_stop: event->hw.state = PERF_HES_STOPPED; @@ -319,9 +319,8 @@ static void bts_event_stop(struct perf_event *event, int flags) bts->handle.head = local_xchg(>data_size, buf->nr_pages << PAGE_SHIFT); - - perf_aux_output_end(>handle, local_xchg(>data_size, 0), - !!local_xchg(>lost, 0)); + perf_aux_output_end(>handle, + local_xchg(>data_size, 0)); } cpuc->ds->bts_index = bts->ds_back.bts_buffer_base; @@ -484,8 +483,7 @@ int intel_bts_interrupt(void) if (old_head == local_read(>head)) return handled; - perf_aux_output_end(>handle, local_xchg(>data_size, 0), - !!local_xchg(>lost, 0)); + perf_aux_output_end(>handle, local_xchg(>data_size, 0)); buf = perf_aux_output_begin(>handle, event); if (buf) @@ -500,7 +498,7 @@ int intel_bts_interrupt(void) * cleared handle::event */ barrier(); - perf_aux_output_end(>handle, 0, false); + perf_aux_output_end(>handle, 0); } } diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 5900471..0218728 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -753,7 +753,8 @@ static void pt_handle_status(struct pt *pt) */ if (!pt_cap_get(PT_CAP_topa_multiple_entries) || buf->output_off == sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size)) { - local_inc(>lost); + perf_aux_output_flag(>handle, +PERF_AUX_FLAG_TRUNCATED); advance++; } } @@ -846,8 +847,10 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf, /* can't stop in the middle of
[tip:perf/core] perf/core: Don't pass PERF_EF_START to the PMU ->start callback
Commit-ID: c9bbdd4830ab06288bb1d8c00ed8c8c6e80e377a Gitweb: http://git.kernel.org/tip/c9bbdd4830ab06288bb1d8c00ed8c8c6e80e377a Author: Will DeaconAuthorDate: Mon, 15 Aug 2016 11:42:45 +0100 Committer: Ingo Molnar CommitDate: Mon, 5 Sep 2016 13:19:18 +0200 perf/core: Don't pass PERF_EF_START to the PMU ->start callback PERF_EF_START is a flag to indicate to the PMU ->add() callback that, as well as claiming the PMU resources required by the event being added, it should also start the PMU. Passing this flag to the ->start() callback doesn't make sense, because ->start() always tries to start the PMU. Remove it. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: mark.rutl...@arm.com Link: http://lkml.kernel.org/r/1471257765-29662-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index dff00c7..74f22a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2492,7 +2492,7 @@ static int __perf_event_stop(void *info) * while restarting. */ if (sd->restart) - event->pmu->start(event, PERF_EF_START); + event->pmu->start(event, 0); return 0; }
[tip:perf/core] perf/core: Don't pass PERF_EF_START to the PMU ->start callback
Commit-ID: c9bbdd4830ab06288bb1d8c00ed8c8c6e80e377a Gitweb: http://git.kernel.org/tip/c9bbdd4830ab06288bb1d8c00ed8c8c6e80e377a Author: Will Deacon AuthorDate: Mon, 15 Aug 2016 11:42:45 +0100 Committer: Ingo Molnar CommitDate: Mon, 5 Sep 2016 13:19:18 +0200 perf/core: Don't pass PERF_EF_START to the PMU ->start callback PERF_EF_START is a flag to indicate to the PMU ->add() callback that, as well as claiming the PMU resources required by the event being added, it should also start the PMU. Passing this flag to the ->start() callback doesn't make sense, because ->start() always tries to start the PMU. Remove it. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: mark.rutl...@arm.com Link: http://lkml.kernel.org/r/1471257765-29662-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index dff00c7..74f22a9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2492,7 +2492,7 @@ static int __perf_event_stop(void *info) * while restarting. */ if (sd->restart) - event->pmu->start(event, PERF_EF_START); + event->pmu->start(event, 0); return 0; }
[tip:perf/urgent] perf/core: Use this_cpu_ptr() when stopping AUX events
Commit-ID: 8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Gitweb: http://git.kernel.org/tip/8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Author: Will DeaconAuthorDate: Wed, 24 Aug 2016 10:07:14 +0100 Committer: Ingo Molnar CommitDate: Wed, 24 Aug 2016 15:03:10 +0200 perf/core: Use this_cpu_ptr() when stopping AUX events When tearing down an AUX buf for an event via perf_mmap_close(), __perf_event_output_stop() is called on the event's CPU to ensure that trace generation is halted before the process of unmapping and freeing the buffer pages begins. The callback is performed via cpu_function_call(), which ensures that it runs with interrupts disabled and is therefore not preemptible. Unfortunately, the current code grabs the per-cpu context pointer using get_cpu_ptr(), which unnecessarily disables preemption and doesn't pair the call with put_cpu_ptr(), leading to a preempt_count() imbalance and a BUG when freeing the AUX buffer later on: WARNING: CPU: 1 PID: 2249 at kernel/events/ring_buffer.c:539 __rb_free_aux+0x10c/0x120 Modules linked in: [...] Call Trace: [] dump_stack+0x4f/0x72 [] __warn+0xc6/0xe0 [] warn_slowpath_null+0x18/0x20 [] __rb_free_aux+0x10c/0x120 [] rb_free_aux+0x13/0x20 [] perf_mmap_close+0x29e/0x2f0 [] ? perf_iterate_ctx+0xe0/0xe0 [] remove_vma+0x25/0x60 [] exit_mmap+0x106/0x140 [] mmput+0x1c/0xd0 [] do_exit+0x253/0xbf0 [] do_group_exit+0x3e/0xb0 [] get_signal+0x249/0x640 [] do_signal+0x23/0x640 [] ? _raw_write_unlock_irq+0x12/0x30 [] ? _raw_spin_unlock_irq+0x9/0x10 [] ? __schedule+0x2c6/0x710 [] exit_to_usermode_loop+0x74/0x90 [] prepare_exit_to_usermode+0x26/0x30 [] retint_user+0x8/0x10 This patch uses this_cpu_ptr() instead of get_cpu_ptr(), since preemption is already disabled by the caller. Signed-off-by: Will Deacon Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 95ff4ca26c49 ("perf/core: Free AUX pages in unmap path") Link: http://lkml.kernel.org/r/20160824091905.ga16...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5650f53..3cfabdf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6166,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info) { struct perf_event *event = info; struct pmu *pmu = event->pmu; - struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct remote_output ro = { .rb = event->rb, };
[tip:perf/urgent] perf/core: Use this_cpu_ptr() when stopping AUX events
Commit-ID: 8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Gitweb: http://git.kernel.org/tip/8b6a3fe8fab97716990a3abde1a01fb5a34552a3 Author: Will Deacon AuthorDate: Wed, 24 Aug 2016 10:07:14 +0100 Committer: Ingo Molnar CommitDate: Wed, 24 Aug 2016 15:03:10 +0200 perf/core: Use this_cpu_ptr() when stopping AUX events When tearing down an AUX buf for an event via perf_mmap_close(), __perf_event_output_stop() is called on the event's CPU to ensure that trace generation is halted before the process of unmapping and freeing the buffer pages begins. The callback is performed via cpu_function_call(), which ensures that it runs with interrupts disabled and is therefore not preemptible. Unfortunately, the current code grabs the per-cpu context pointer using get_cpu_ptr(), which unnecessarily disables preemption and doesn't pair the call with put_cpu_ptr(), leading to a preempt_count() imbalance and a BUG when freeing the AUX buffer later on: WARNING: CPU: 1 PID: 2249 at kernel/events/ring_buffer.c:539 __rb_free_aux+0x10c/0x120 Modules linked in: [...] Call Trace: [] dump_stack+0x4f/0x72 [] __warn+0xc6/0xe0 [] warn_slowpath_null+0x18/0x20 [] __rb_free_aux+0x10c/0x120 [] rb_free_aux+0x13/0x20 [] perf_mmap_close+0x29e/0x2f0 [] ? perf_iterate_ctx+0xe0/0xe0 [] remove_vma+0x25/0x60 [] exit_mmap+0x106/0x140 [] mmput+0x1c/0xd0 [] do_exit+0x253/0xbf0 [] do_group_exit+0x3e/0xb0 [] get_signal+0x249/0x640 [] do_signal+0x23/0x640 [] ? _raw_write_unlock_irq+0x12/0x30 [] ? _raw_spin_unlock_irq+0x9/0x10 [] ? __schedule+0x2c6/0x710 [] exit_to_usermode_loop+0x74/0x90 [] prepare_exit_to_usermode+0x26/0x30 [] retint_user+0x8/0x10 This patch uses this_cpu_ptr() instead of get_cpu_ptr(), since preemption is already disabled by the caller. Signed-off-by: Will Deacon Reviewed-by: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vince Weaver Fixes: 95ff4ca26c49 ("perf/core: Free AUX pages in unmap path") Link: http://lkml.kernel.org/r/20160824091905.ga16...@arm.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 5650f53..3cfabdf 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -6166,7 +6166,7 @@ static int __perf_pmu_output_stop(void *info) { struct perf_event *event = info; struct pmu *pmu = event->pmu; - struct perf_cpu_context *cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); struct remote_output ro = { .rb = event->rb, };
[tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
Commit-ID: 03e3c2b7edbe1e8758196b2c784eb328063d Gitweb: http://git.kernel.org/tip/03e3c2b7edbe1e8758196b2c784eb328063d Author: Will DeaconAuthorDate: Mon, 27 Jun 2016 18:43:54 +0100 Committer: Ingo Molnar CommitDate: Thu, 7 Jul 2016 09:10:53 +0200 locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() smp_cond_load_acquire() is used to spin on a variable until some expression involving that variable becomes true. On arm64, we can build this using the LDXR and WFE instructions, since clearing of the exclusive monitor as a result of the variable being changed by another CPU generates an event, which will wake us up out of WFE. This patch implements smp_cond_load_acquire() using LDXR and WFE, which themselves are contained in an internal __cmpwait() function. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: catalin.mari...@arm.com Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/barrier.h | 13 ++ arch/arm64/include/asm/cmpxchg.h | 51 2 files changed, 64 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49..4eea7f6 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL;\ +}) + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b4..bd86a79 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name)\ +static inline void __cmpwait_case_##name(volatile void *ptr, \ +unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz%" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:"\ + : [tmp] "=" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val,\ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \
[tip:locking/core] locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire()
Commit-ID: 03e3c2b7edbe1e8758196b2c784eb328063d Gitweb: http://git.kernel.org/tip/03e3c2b7edbe1e8758196b2c784eb328063d Author: Will Deacon AuthorDate: Mon, 27 Jun 2016 18:43:54 +0100 Committer: Ingo Molnar CommitDate: Thu, 7 Jul 2016 09:10:53 +0200 locking/barriers, arch/arm64: Implement LDXR+WFE based smp_cond_load_acquire() smp_cond_load_acquire() is used to spin on a variable until some expression involving that variable becomes true. On arm64, we can build this using the LDXR and WFE instructions, since clearing of the exclusive monitor as a result of the variable being changed by another CPU generates an event, which will wake us up out of WFE. This patch implements smp_cond_load_acquire() using LDXR and WFE, which themselves are contained in an internal __cmpwait() function. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: catalin.mari...@arm.com Cc: linux-arm-ker...@lists.infradead.org Link: http://lkml.kernel.org/r/1467049434-30451-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/barrier.h | 13 ++ arch/arm64/include/asm/cmpxchg.h | 51 2 files changed, 64 insertions(+) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index dae5c49..4eea7f6 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -91,6 +91,19 @@ do { \ __u.__val; \ }) +#define smp_cond_load_acquire(ptr, cond_expr) \ +({ \ + typeof(ptr) __PTR = (ptr); \ + typeof(*ptr) VAL; \ + for (;;) { \ + VAL = smp_load_acquire(__PTR); \ + if (cond_expr) \ + break; \ + __cmpwait_relaxed(__PTR, VAL); \ + } \ + VAL;\ +}) + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 510c7b4..bd86a79 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -224,4 +224,55 @@ __CMPXCHG_GEN(_mb) __ret; \ }) +#define __CMPWAIT_CASE(w, sz, name)\ +static inline void __cmpwait_case_##name(volatile void *ptr, \ +unsigned long val) \ +{ \ + unsigned long tmp; \ + \ + asm volatile( \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ + " cbnz%" #w "[tmp], 1f\n" \ + " wfe\n" \ + "1:"\ + : [tmp] "=" (tmp), [v] "+Q" (*(unsigned long *)ptr) \ + : [val] "r" (val)); \ +} + +__CMPWAIT_CASE(w, b, 1); +__CMPWAIT_CASE(w, h, 2); +__CMPWAIT_CASE(w, , 4); +__CMPWAIT_CASE( , , 8); + +#undef __CMPWAIT_CASE + +#define __CMPWAIT_GEN(sfx) \ +static inline void __cmpwait##sfx(volatile void *ptr, \ + unsigned long val,\ + int size) \ +{ \ + switch (size) { \ + case 1: \ + return __cmpwait_case##sfx##_1(ptr, (u8)val); \ + case 2: \ + return __cmpwait_case##sfx##_2(ptr, (u16)val); \ + case 4: \ + return __cmpwait_case##sfx##_4(ptr, val); \ + case 8: \ +
[tip:locking/arch-atomic] locking/atomic, arch/arm64: Generate LSE non-return cases using common macros
Commit-ID: 6822a84dd4e35a1beb70028e46b5f60c14fc422d Gitweb: http://git.kernel.org/tip/6822a84dd4e35a1beb70028e46b5f60c14fc422d Author: Will DeaconAuthorDate: Fri, 22 Apr 2016 18:01:32 +0100 Committer: Ingo Molnar CommitDate: Thu, 16 Jun 2016 10:48:22 +0200 locking/atomic, arch/arm64: Generate LSE non-return cases using common macros atomic[64]_{add,and,andnot,or,xor} all follow the same patterns, so generate them using macros, like we do for the LL/SC case already. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-a...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 122 ++-- 1 file changed, 32 insertions(+), 90 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d34..37a0f03 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,25 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS);\ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -167,54 +138,25 @@ ATOMIC_OP_SUB_RETURN(, al, "memory") #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - :
[tip:locking/arch-atomic] locking/atomic, arch/arm64: Generate LSE non-return cases using common macros
Commit-ID: 6822a84dd4e35a1beb70028e46b5f60c14fc422d Gitweb: http://git.kernel.org/tip/6822a84dd4e35a1beb70028e46b5f60c14fc422d Author: Will Deacon AuthorDate: Fri, 22 Apr 2016 18:01:32 +0100 Committer: Ingo Molnar CommitDate: Thu, 16 Jun 2016 10:48:22 +0200 locking/atomic, arch/arm64: Generate LSE non-return cases using common macros atomic[64]_{add,and,andnot,or,xor} all follow the same patterns, so generate them using macros, like we do for the LL/SC case already. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-a...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-1-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 122 ++-- 1 file changed, 32 insertions(+), 90 deletions(-) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 39c1d34..37a0f03 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -26,54 +26,25 @@ #endif #define __LL_SC_ATOMIC(op) __LL_SC_CALL(atomic_##op) - -static inline void atomic_andnot(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(andnot), - " stclr %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_or(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(or), - " stset %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic_xor(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(xor), - " steor %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); +#define ATOMIC_OP(op, asm_op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(op), \ +" " #asm_op " %w[i], %[v]\n") \ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS);\ } -static inline void atomic_add(int i, atomic_t *v) -{ - register int w0 asm ("w0") = i; - register atomic_t *x1 asm ("x1") = v; +ATOMIC_OP(andnot, stclr) +ATOMIC_OP(or, stset) +ATOMIC_OP(xor, steor) +ATOMIC_OP(add, stadd) - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC(add), - " stadd %w[i], %[v]\n") - : [i] "+r" (w0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} +#undef ATOMIC_OP #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ @@ -167,54 +138,25 @@ ATOMIC_OP_SUB_RETURN(, al, "memory") #undef __LL_SC_ATOMIC #define __LL_SC_ATOMIC64(op) __LL_SC_CALL(atomic64_##op) - -static inline void atomic64_andnot(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(andnot), - " stclr %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_or(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(or), - " stset %[i], %[v]\n") - : [i] "+r" (x0), [v] "+Q" (v->counter) - : "r" (x1) - : __LL_SC_CLOBBERS); -} - -static inline void atomic64_xor(long i, atomic64_t *v) -{ - register long x0 asm ("x0") = i; - register atomic64_t *x1 asm ("x1") = v; - - asm volatile(ARM64_LSE_ATOMIC_INSN(__LL_SC_ATOMIC64(xor), - " steor %[i], %[v]\n") - : [i] "+r" (x0), [v]
[tip:locking/arch-atomic] locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions
Commit-ID: 2efe95fe695270ae1a225805f016303505972d86 Gitweb: http://git.kernel.org/tip/2efe95fe695270ae1a225805f016303505972d86 Author: Will DeaconAuthorDate: Fri, 22 Apr 2016 18:01:33 +0100 Committer: Ingo Molnar CommitDate: Thu, 16 Jun 2016 10:48:22 +0200 locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). This patch implements the LSE variants. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-a...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 172 1 file changed, 172 insertions(+) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 37a0f03..b5890be 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -46,6 +46,38 @@ ATOMIC_OP(add, stadd) #undef ATOMIC_OP +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]")\ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(, al, op, asm_op, "memory") + +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS + #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ { \ @@ -90,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name),\ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb "%w[i], %w[i], %[v]")\ + :
[tip:locking/arch-atomic] locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions
Commit-ID: 2efe95fe695270ae1a225805f016303505972d86 Gitweb: http://git.kernel.org/tip/2efe95fe695270ae1a225805f016303505972d86 Author: Will Deacon AuthorDate: Fri, 22 Apr 2016 18:01:33 +0100 Committer: Ingo Molnar CommitDate: Thu, 16 Jun 2016 10:48:22 +0200 locking/atomic, arch/arm64: Implement atomic{,64}_fetch_{add,sub,and,andnot,or,xor}{,_relaxed,_acquire,_release}() for LSE instructions Implement FETCH-OP atomic primitives, these are very similar to the existing OP-RETURN primitives we already have, except they return the value of the atomic variable _before_ modification. This is especially useful for irreversible operations -- such as bitops (because it becomes impossible to reconstruct the state prior to modification). This patch implements the LSE variants. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Linus Torvalds Cc: Lorenzo Pieralisi Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-a...@vger.kernel.org Cc: linux-arm-ker...@lists.infradead.org Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1461344493-8262-2-git-send-email-will.dea...@arm.com Signed-off-by: Ingo Molnar --- arch/arm64/include/asm/atomic_lse.h | 172 1 file changed, 172 insertions(+) diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 37a0f03..b5890be 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -46,6 +46,38 @@ ATOMIC_OP(add, stadd) #undef ATOMIC_OP +#define ATOMIC_FETCH_OP(name, mb, op, asm_op, cl...) \ +static inline int atomic_fetch_##op##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + __LL_SC_ATOMIC(fetch_##op##name), \ + /* LSE atomics */ \ +" " #asm_op #mb " %w[i], %w[i], %[v]")\ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0; \ +} + +#define ATOMIC_FETCH_OPS(op, asm_op) \ + ATOMIC_FETCH_OP(_relaxed, , op, asm_op) \ + ATOMIC_FETCH_OP(_acquire, a, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(_release, l, op, asm_op, "memory") \ + ATOMIC_FETCH_OP(, al, op, asm_op, "memory") + +ATOMIC_FETCH_OPS(andnot, ldclr) +ATOMIC_FETCH_OPS(or, ldset) +ATOMIC_FETCH_OPS(xor, ldeor) +ATOMIC_FETCH_OPS(add, ldadd) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_FETCH_OPS + #define ATOMIC_OP_ADD_RETURN(name, mb, cl...) \ static inline int atomic_add_return##name(int i, atomic_t *v) \ { \ @@ -90,6 +122,33 @@ static inline void atomic_and(int i, atomic_t *v) : __LL_SC_CLOBBERS); } +#define ATOMIC_FETCH_OP_AND(name, mb, cl...) \ +static inline int atomic_fetch_and##name(int i, atomic_t *v) \ +{ \ + register int w0 asm ("w0") = i; \ + register atomic_t *x1 asm ("x1") = v; \ + \ + asm volatile(ARM64_LSE_ATOMIC_INSN( \ + /* LL/SC */ \ + " nop\n" \ + __LL_SC_ATOMIC(fetch_and##name),\ + /* LSE atomics */ \ + " mvn %w[i], %w[i]\n" \ + " ldclr" #mb "%w[i], %w[i], %[v]")\ + : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : "r" (x1) \ + : __LL_SC_CLOBBERS, ##cl); \ + \ + return w0;