On Wed, 12 Jul 2017 05:40:49 PDT (-0700), boqun.f...@gmail.com wrote: > On Tue, Jul 11, 2017 at 06:31:23PM -0700, Palmer Dabbelt wrote: > [...] >> diff --git a/arch/riscv/include/asm/bitops.h >> b/arch/riscv/include/asm/bitops.h >> new file mode 100644 >> index 000000000000..b0a0c76e966a >> --- /dev/null >> +++ b/arch/riscv/include/asm/bitops.h >> @@ -0,0 +1,216 @@ >> +/* >> + * Copyright (C) 2012 Regents of the University of California >> + * >> + * This program is free software; you can redistribute it and/or >> + * modify it under the terms of the GNU General Public License >> + * as published by the Free Software Foundation, version 2. >> + * >> + * This program is distributed in the hope that it will be useful, >> + * but WITHOUT ANY WARRANTY; without even the implied warranty of >> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the >> + * GNU General Public License for more details. >> + */ >> + >> +#ifndef _ASM_RISCV_BITOPS_H >> +#define _ASM_RISCV_BITOPS_H >> + >> +#ifndef _LINUX_BITOPS_H >> +#error "Only <linux/bitops.h> can be included directly" >> +#endif /* _LINUX_BITOPS_H */ >> + >> +#include <linux/compiler.h> >> +#include <linux/irqflags.h> >> +#include <asm/barrier.h> >> +#include <asm/bitsperlong.h> >> + >> +#ifndef smp_mb__before_clear_bit >> +#define smp_mb__before_clear_bit() smp_mb() >> +#define smp_mb__after_clear_bit() smp_mb() >> +#endif /* smp_mb__before_clear_bit */ >> + >> +#include <asm-generic/bitops/__ffs.h> >> +#include <asm-generic/bitops/ffz.h> >> +#include <asm-generic/bitops/fls.h> >> +#include <asm-generic/bitops/__fls.h> >> +#include <asm-generic/bitops/fls64.h> >> +#include <asm-generic/bitops/find.h> >> +#include <asm-generic/bitops/sched.h> >> +#include <asm-generic/bitops/ffs.h> >> + >> +#include <asm-generic/bitops/hweight.h> >> + >> +#if (BITS_PER_LONG == 64) >> +#define __AMO(op) "amo" #op ".d" >> +#elif (BITS_PER_LONG == 32) >> +#define __AMO(op) "amo" #op ".w" >> +#else >> +#error "Unexpected BITS_PER_LONG" >> +#endif >> + > > So the test_and_{set,change,clear}_bit() have the similar semantics as > cmpxchg(), so > >> +#define __test_and_op_bit(op, mod, nr, addr) \ >> +({ \ >> + unsigned long __res, __mask; \ >> + __mask = BIT_MASK(nr); \ >> + __asm__ __volatile__ ( \ >> + __AMO(op) " %0, %2, %1" \ > > ... "aqrl" bit is needed here, and > >> + : "=r" (__res), "+A" (addr[BIT_WORD(nr)]) \ >> + : "r" (mod(__mask))); \ > > ... "memory" clobber is needed here. > >> + ((__res & __mask) != 0); \ >> +}) >> + >> +#define __op_bit(op, mod, nr, addr) \ >> + __asm__ __volatile__ ( \ >> + __AMO(op) " zero, %1, %0" \ >> + : "+A" (addr[BIT_WORD(nr)]) \ >> + : "r" (mod(BIT_MASK(nr)))) >> + >> +/* Bitmask modifiers */ >> +#define __NOP(x) (x) >> +#define __NOT(x) (~(x)) >> + >> +/** >> + * test_and_set_bit - Set a bit and return its old value >> + * @nr: Bit to set >> + * @addr: Address to count from >> + * >> + * This operation is atomic and cannot be reordered. >> + * It may be reordered on other architectures than x86. >> + * It also implies a memory barrier. >> + */ >> +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) >> +{ >> + return __test_and_op_bit(or, __NOP, nr, addr); >> +} >> + >> +/** >> + * test_and_clear_bit - Clear a bit and return its old value >> + * @nr: Bit to clear >> + * @addr: Address to count from >> + * >> + * This operation is atomic and cannot be reordered. >> + * It can be reordered on other architectures other than x86. >> + * It also implies a memory barrier. >> + */ >> +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) >> +{ >> + return __test_and_op_bit(and, __NOT, nr, addr); >> +} >> + >> +/** >> + * test_and_change_bit - Change a bit and return its old value >> + * @nr: Bit to change >> + * @addr: Address to count from >> + * >> + * This operation is atomic and cannot be reordered. >> + * It also implies a memory barrier. >> + */ >> +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) >> +{ >> + return __test_and_op_bit(xor, __NOP, nr, addr); >> +} >> + >> +/** >> + * set_bit - Atomically set a bit in memory >> + * @nr: the bit to set >> + * @addr: the address to start counting from >> + * >> + * This function is atomic and may not be reordered. See __set_bit() > > This is incorrect, {set,change,clear}_bit() can be reordered, see > Documentation/memory-barriers.txt, they are just relaxed atomics. But I > think you just copy this from x86 code, so maybe x86 code needs help > too, at least claim that's only x86-specific guarantee.
I went ahead and fixed our comments. https://github.com/riscv/riscv-linux/commit/38d727b99b9eb76fac533cebc23f89d364b7d60d > >> + * if you do not require the atomic guarantees. >> + * >> + * Note: there are no guarantees that this function will not be reordered >> + * on non x86 architectures, so if you are writing portable code, >> + * make sure not to rely on its reordering guarantees. >> + * >> + * Note that @nr may be almost arbitrarily large; this function is not >> + * restricted to acting on a single-word quantity. >> + */ >> +static inline void set_bit(int nr, volatile unsigned long *addr) >> +{ >> + __op_bit(or, __NOP, nr, addr); >> +} >> + >> +/** >> + * clear_bit - Clears a bit in memory >> + * @nr: Bit to clear >> + * @addr: Address to start counting from >> + * >> + * clear_bit() is atomic and may not be reordered. However, it does >> + * not contain a memory barrier, so if it is used for locking purposes, >> + * you should call smp_mb__before_clear_bit() and/or >> smp_mb__after_clear_bit() >> + * in order to ensure changes are visible on other processors. >> + */ >> +static inline void clear_bit(int nr, volatile unsigned long *addr) >> +{ >> + __op_bit(and, __NOT, nr, addr); >> +} >> + >> +/** >> + * change_bit - Toggle a bit in memory >> + * @nr: Bit to change >> + * @addr: Address to start counting from >> + * >> + * change_bit() is atomic and may not be reordered. It may be >> + * reordered on other architectures than x86. >> + * Note that @nr may be almost arbitrarily large; this function is not >> + * restricted to acting on a single-word quantity. >> + */ >> +static inline void change_bit(int nr, volatile unsigned long *addr) >> +{ >> + __op_bit(xor, __NOP, nr, addr); >> +} >> + >> +/** >> + * test_and_set_bit_lock - Set a bit and return its old value, for lock >> + * @nr: Bit to set >> + * @addr: Address to count from >> + * >> + * This operation is atomic and provides acquire barrier semantics. >> + * It can be used to implement bit locks. >> + */ >> +static inline int test_and_set_bit_lock( >> + unsigned long nr, volatile unsigned long *addr) >> +{ >> + return test_and_set_bit(nr, addr); > > If you want, you can open code an "amoor.aq" here, because > test_and_set_bit_lock() only needs an acquire barrier. > >> +} >> + >> +/** >> + * clear_bit_unlock - Clear a bit in memory, for unlock >> + * @nr: the bit to set >> + * @addr: the address to start counting from >> + * >> + * This operation is atomic and provides release barrier semantics. >> + */ >> +static inline void clear_bit_unlock( >> + unsigned long nr, volatile unsigned long *addr) >> +{ > > You need a smp_mb__before_atomic() here, because clear_bit() is only > relaxed atomic. And clear_bit_unlock() is a release. Makes sense. I went ahead and added the aq and rl bits to the AMOs already in these two: https://github.com/riscv/riscv-linux/commit/f3903a2b403522a8dafd5cbe850caa22755d6b5b