Re: [PATCH V3 11/27] csky: Atomic operations

Peter Zijlstra Wed, 12 Sep 2018 08:55:48 -0700

On Wed, Sep 12, 2018 at 09:24:45PM +0800, Guo Ren wrote:

> +#define ATOMIC_OP(op, c_op)                                          \
> +static inline void atomic_##op(int i, atomic_t *v)                   \
> +{                                                                    \
> +     unsigned long tmp;                                              \
> +                                                                     \
> +     smp_mb();                                                       \
> +     asm volatile (                                                  \
> +     "1:     ldex.w          %0, (%2) \n"                            \
> +     "       " #op "         %0, %1   \n"                            \
> +     "       stex.w          %0, (%2) \n"                            \
> +     "       bez             %0, 1b   \n"                            \
> +             : "=&r" (tmp)                                           \
> +             : "r" (i), "r"(&v->counter)                             \
> +             : "memory");                                            \
> +     smp_mb();                                                       \
> +}


ATOMIC_OP doesn't need to imply any smp_mb()'s what so ever.

> +#define ATOMIC_OP_RETURN(op, c_op)                                   \
> +static inline int atomic_##op##_return(int i, atomic_t *v)           \
> +{                                                                    \
> +     unsigned long tmp, ret;                                         \
> +                                                                     \
> +     smp_mb();                                                       \
> +     asm volatile (                                                  \
> +     "1:     ldex.w          %0, (%3) \n"                            \
> +     "       " #op "         %0, %2   \n"                            \
> +     "       mov             %1, %0   \n"                            \
> +     "       stex.w          %0, (%3) \n"                            \
> +     "       bez             %0, 1b   \n"                            \
> +             : "=&r" (tmp), "=&r" (ret)                              \
> +             : "r" (i), "r"(&v->counter)                             \
> +             : "memory");                                            \
> +     smp_mb();                                                       \
> +                                                                     \
> +     return ret;                                                     \
> +}
> +
> +#define ATOMIC_FETCH_OP(op, c_op)                                    \
> +static inline int atomic_fetch_##op(int i, atomic_t *v)                      
> \
> +{                                                                    \
> +     unsigned long tmp, ret;                                         \
> +                                                                     \
> +     smp_mb();                                                       \
> +     asm volatile (                                                  \
> +     "1:     ldex.w          %0, (%3) \n"                            \
> +     "       mov             %1, %0   \n"                            \
> +     "       " #op "         %0, %2   \n"                            \
> +     "       stex.w          %0, (%3) \n"                            \
> +     "       bez             %0, 1b   \n"                            \
> +             : "=&r" (tmp), "=&r" (ret)                              \
> +             : "r" (i), "r"(&v->counter)                             \
> +             : "memory");                                            \
> +     smp_mb();                                                       \
> +                                                                     \
> +     return ret;                                                     \
> +}

For these you could generate _relaxed variants and not provide smp_mb()
inside them.

> +#else /* CONFIG_CPU_HAS_LDSTEX */
> +
> +#include <linux/irqflags.h>
> +

> +#define ATOMIC_OP(op, c_op)                                          \
> +static inline void atomic_##op(int i, atomic_t *v)                   \
> +{                                                                    \
> +     unsigned long tmp, flags;                                       \
> +                                                                     \
> +     raw_local_irq_save(flags);                                      \
> +                                                                     \
> +     asm volatile (                                                  \
> +     "       ldw             %0, (%2) \n"                            \
> +     "       " #op "         %0, %1   \n"                            \
> +     "       stw             %0, (%2) \n"                            \
> +             : "=&r" (tmp)                                           \
> +             : "r" (i), "r"(&v->counter)                             \
> +             : "memory");                                            \
> +                                                                     \
> +     raw_local_irq_restore(flags);                                   \
> +}

Is this really 'better' than the generic UP fallback implementation?





> diff --git a/arch/csky/include/asm/spinlock.h 
> b/arch/csky/include/asm/spinlock.h
> new file mode 100644
> index 0000000..f1081bb
> --- /dev/null
> +++ b/arch/csky/include/asm/spinlock.h
> @@ -0,0 +1,286 @@
> +#ifndef __ASM_CSKY_SPINLOCK_H
> +#define __ASM_CSKY_SPINLOCK_H
> +
> +#include <linux/spinlock_types.h>
> +#include <asm/barrier.h>
> +
> +#ifdef CONFIG_QUEUED_RWLOCKS
> +
> +/*
> + * Ticket-based spin-locking.
> + */
> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> +     arch_spinlock_t lockval;
> +     u32 ticket_next = 1 << TICKET_NEXT;
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     smp_mb();

spin_lock() doesn't need smp_mb() before.

> +     asm volatile (
> +             "1:     ldex.w          %0, (%2) \n"
> +             "       mov             %1, %0   \n"
> +             "       add             %0, %3   \n"
> +             "       stex.w          %0, (%2) \n"
> +             "       bez             %0, 1b   \n"
> +             : "=&r" (tmp), "=&r" (lockval)
> +             : "r"(p), "r"(ticket_next)
> +             : "cc");
> +
> +     while (lockval.tickets.next != lockval.tickets.owner) {
> +             lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
> +     }
> +
> +     smp_mb();
> +}
> +
> +static inline int arch_spin_trylock(arch_spinlock_t *lock)
> +{
> +     u32 tmp, contended, res;
> +     u32 ticket_next = 1 << TICKET_NEXT;
> +     u32 *p = &lock->lock;
> +
> +     smp_mb();

idem.

> +     do {
> +             asm volatile (
> +             "       ldex.w          %0, (%3)   \n"
> +             "       movi            %2, 1      \n"
> +             "       rotli           %1, %0, 16 \n"
> +             "       cmpne           %1, %0     \n"
> +             "       bt              1f         \n"
> +             "       movi            %2, 0      \n"
> +             "       add             %0, %0, %4 \n"
> +             "       stex.w          %0, (%3)   \n"
> +             "1:                                \n"
> +             : "=&r" (res), "=&r" (tmp), "=&r" (contended)
> +             : "r"(p), "r"(ticket_next)
> +             : "cc");
> +     } while (!res);
> +
> +     if (!contended)
> +             smp_mb();
> +
> +     return !contended;
> +}
> +
> +static inline void arch_spin_unlock(arch_spinlock_t *lock)
> +{
> +     smp_mb();
> +     lock->tickets.owner++;
> +     smp_mb();

spin_unlock() doesn't need smp_mb() after.

> +}
> +
> +static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
> +{
> +     return lock.tickets.owner == lock.tickets.next;
> +}
> +
> +static inline int arch_spin_is_locked(arch_spinlock_t *lock)
> +{
> +     return !arch_spin_value_unlocked(READ_ONCE(*lock));
> +}
> +
> +static inline int arch_spin_is_contended(arch_spinlock_t *lock)
> +{
> +     struct __raw_tickets tickets = READ_ONCE(lock->tickets);
> +     return (tickets.next - tickets.owner) > 1;
> +}
> +#define arch_spin_is_contended       arch_spin_is_contended
> +
> +#include <asm/qrwlock.h>
> +
> +/* See include/linux/spinlock.h */
> +#define smp_mb__after_spinlock()     smp_mb()
> +
> +#else /* CONFIG_QUEUED_RWLOCKS */
> +
> +/*
> + * Test-and-set spin-locking.
> + */

Why retain that?

same comments; it has far too many smp_mb()s in.

> +#endif /* CONFIG_QUEUED_RWLOCKS */
> +#endif /* __ASM_CSKY_SPINLOCK_H */
> diff --git a/arch/csky/include/asm/spinlock_types.h 
> b/arch/csky/include/asm/spinlock_types.h
> new file mode 100644
> index 0000000..7e825c2
> --- /dev/null
> +++ b/arch/csky/include/asm/spinlock_types.h
> @@ -0,0 +1,35 @@
> +#ifndef __ASM_CSKY_SPINLOCK_TYPES_H
> +#define __ASM_CSKY_SPINLOCK_TYPES_H
> +
> +#ifndef __LINUX_SPINLOCK_TYPES_H
> +# error "please don't include this file directly"
> +#endif
> +
> +#define TICKET_NEXT  16
> +
> +typedef struct {
> +     union {
> +             u32 lock;
> +             struct __raw_tickets {
> +                     /* little endian */
> +                     u16 owner;
> +                     u16 next;
> +             } tickets;
> +     };
> +} arch_spinlock_t;
> +
> +#define __ARCH_SPIN_LOCK_UNLOCKED    { { 0 } }
> +
> +#ifdef CONFIG_QUEUED_RWLOCKS
> +#include <asm-generic/qrwlock_types.h>
> +
> +#else /* CONFIG_NR_CPUS > 2 */
> +
> +typedef struct {
> +     u32 lock;
> +} arch_rwlock_t;
> +
> +#define __ARCH_RW_LOCK_UNLOCKED              { 0 }
> +
> +#endif /* CONFIG_QUEUED_RWLOCKS */
> +#endif /* __ASM_CSKY_SPINLOCK_TYPES_H */

Re: [PATCH V3 11/27] csky: Atomic operations

Reply via email to