On Tue, Oct 16, 2018 at 10:58:30AM +0800, Guo Ren wrote:

> diff --git a/arch/csky/include/asm/spinlock.h 
> b/arch/csky/include/asm/spinlock.h
> new file mode 100644
> index 0000000..0474603
> --- /dev/null
> +++ b/arch/csky/include/asm/spinlock.h
> @@ -0,0 +1,274 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifndef __ASM_CSKY_SPINLOCK_H
> +#define __ASM_CSKY_SPINLOCK_H
> +
> +#include <linux/spinlock_types.h>
> +#include <asm/barrier.h>
> +
> +#ifdef CONFIG_QUEUED_RWLOCKS
> +
> +/*
> + * Ticket-based spin-locking.
> + */
> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> +     arch_spinlock_t lockval;
> +     u32 ticket_next = 1 << TICKET_NEXT;
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     asm volatile (
> +             "1:     ldex.w          %0, (%2) \n"
> +             "       mov             %1, %0   \n"
> +             "       add             %0, %3   \n"
> +             "       stex.w          %0, (%2) \n"
> +             "       bez             %0, 1b   \n"
> +             : "=&r" (tmp), "=&r" (lockval)
> +             : "r"(p), "r"(ticket_next)
> +             : "cc");
> +
> +     while (lockval.tickets.next != lockval.tickets.owner)
> +             lockval.tickets.owner = READ_ONCE(lock->tickets.owner);
> +
> +     smp_mb();
> +}
> +
> +static inline int arch_spin_trylock(arch_spinlock_t *lock)
> +{
> +     u32 tmp, contended, res;
> +     u32 ticket_next = 1 << TICKET_NEXT;
> +     u32 *p = &lock->lock;
> +
> +     do {
> +             asm volatile (
> +             "       ldex.w          %0, (%3)   \n"
> +             "       movi            %2, 1      \n"
> +             "       rotli           %1, %0, 16 \n"
> +             "       cmpne           %1, %0     \n"
> +             "       bt              1f         \n"
> +             "       movi            %2, 0      \n"
> +             "       add             %0, %0, %4 \n"
> +             "       stex.w          %0, (%3)   \n"
> +             "1:                                \n"
> +             : "=&r" (res), "=&r" (tmp), "=&r" (contended)
> +             : "r"(p), "r"(ticket_next)
> +             : "cc");
> +     } while (!res);
> +
> +     if (!contended)
> +             smp_mb();
> +
> +     return !contended;
> +}
> +
> +static inline void arch_spin_unlock(arch_spinlock_t *lock)
> +{
> +     smp_mb();
> +     lock->tickets.owner++;

        WRITE_ONCE(lock->tickets.owner, lock->tickets.owner + 1);

> +}
> +
> +static inline int arch_spin_value_unlocked(arch_spinlock_t lock)
> +{
> +     return lock.tickets.owner == lock.tickets.next;
> +}
> +
> +static inline int arch_spin_is_locked(arch_spinlock_t *lock)
> +{
> +     return !arch_spin_value_unlocked(READ_ONCE(*lock));
> +}
> +
> +static inline int arch_spin_is_contended(arch_spinlock_t *lock)
> +{
> +     struct __raw_tickets tickets = READ_ONCE(lock->tickets);
> +
> +     return (tickets.next - tickets.owner) > 1;
> +}
> +#define arch_spin_is_contended       arch_spin_is_contended
> +
> +#include <asm/qrwlock.h>
> +
> +/* See include/linux/spinlock.h */
> +#define smp_mb__after_spinlock()     smp_mb()
> +
> +#else /* CONFIG_QUEUED_RWLOCKS */
> +
> +/*
> + * Test-and-set spin-locking.
> + */

I'm still not entirely sure why you want to have two spinlock
implementations; to me that is just extra maintenance overhead.

> +static inline void arch_spin_lock(arch_spinlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     asm volatile (
> +             "1:     ldex.w          %0, (%1) \n"
> +             "       bnez            %0, 1b   \n"
> +             "       movi            %0, 1    \n"
> +             "       stex.w          %0, (%1) \n"
> +             "       bez             %0, 1b   \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");
> +     smp_mb();
> +}
> +
> +static inline void arch_spin_unlock(arch_spinlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     smp_mb();
> +     asm volatile (
> +             "       movi            %0, 0    \n"
> +             "       stw             %0, (%1) \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");

        WRITE_ONCE(lock->lock, 0);
?

> +}
> +
> +static inline int arch_spin_trylock(arch_spinlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     asm volatile (
> +             "1:     ldex.w          %0, (%1) \n"
> +             "       bnez            %0, 2f   \n"
> +             "       movi            %0, 1    \n"
> +             "       stex.w          %0, (%1) \n"
> +             "       bez             %0, 1b   \n"
> +             "       movi            %0, 0    \n"
> +             "2:                              \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");
> +
> +     if (!tmp)
> +             smp_mb();
> +
> +     return !tmp;
> +}
> +
> +#define arch_spin_is_locked(x)       (READ_ONCE((x)->lock) != 0)
> +
> +/*
> + * read lock/unlock/trylock
> + */

Idem, why do you want a second rwlock_t implementation?

> +/*
> + * write lock/unlock/trylock
> + */
> +static inline void arch_write_lock(arch_rwlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     asm volatile (
> +             "1:     ldex.w          %0, (%1) \n"
> +             "       bnez            %0, 1b   \n"
> +             "       subi            %0, 1    \n"
> +             "       stex.w          %0, (%1) \n"
> +             "       bez             %0, 1b   \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");
> +     smp_mb();
> +}
> +
> +static inline void arch_write_unlock(arch_rwlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     smp_mb();
> +     asm volatile (
> +             "1:     ldex.w          %0, (%1) \n"
> +             "       movi            %0, 0    \n"
> +             "       stex.w          %0, (%1) \n"
> +             "       bez             %0, 1b   \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");

Isn't that:

        WRITE_ONCE(lock->lock, 0);

> +}
> +
> +static inline int arch_write_trylock(arch_rwlock_t *lock)
> +{
> +     u32 *p = &lock->lock;
> +     u32 tmp;
> +
> +     asm volatile (
> +             "1:     ldex.w          %0, (%1) \n"
> +             "       bnez            %0, 2f   \n"
> +             "       subi            %0, 1    \n"
> +             "       stex.w          %0, (%1) \n"
> +             "       bez             %0, 1b   \n"
> +             "       movi            %0, 0    \n"
> +             "2:                              \n"
> +             : "=&r" (tmp)
> +             : "r"(p)
> +             : "cc");
> +
> +     if (!tmp)
> +             smp_mb();
> +
> +     return !tmp;
> +}

> diff --git a/arch/csky/kernel/atomic.S b/arch/csky/kernel/atomic.S
> new file mode 100644
> index 0000000..d2357c8
> --- /dev/null
> +++ b/arch/csky/kernel/atomic.S
> @@ -0,0 +1,87 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
> +
> +#include <linux/linkage.h>
> +#include <abi/entry.h>
> +
> +.text
> +
> +/*
> + * int csky_cmpxchg(int oldval, int newval, int *ptr)
> + *
> + * If *ptr != oldval && return 1,
> + * else *ptr = newval return 0.
> + */
> +#ifdef CONFIG_CPU_HAS_LDSTEX
> +ENTRY(csky_cmpxchg)
> +     USPTOKSP
> +     mfcr    a3, epc
> +     INCTRAP a3
> +
> +     subi    sp, 8
> +     stw     a3, (sp, 0)
> +     mfcr    a3, epsr
> +     stw     a3, (sp, 4)
> +
> +     psrset  ee
> +1:
> +     ldex    a3, (a2)
> +     cmpne   a0, a3
> +     bt16    2f
> +     mov     a3, a1
> +     stex    a3, (a2)
> +     bez     a3, 1b
> +2:
> +     sync.is
> +     mvc     a0
> +     ldw     a3, (sp, 0)
> +     mtcr    a3, epc
> +     ldw     a3, (sp, 4)
> +     mtcr    a3, epsr
> +     addi    sp, 8
> +     KSPTOUSP
> +     rte
> +END(csky_cmpxchg)

I don't understand why you have this; if the CPU has ll/sc, why do you
need syscall support?

In any case, nothing terminally broken; so I suppose that's good enough
for starters. I just really don't understand some decisions (like having
two lock implementations and having that cmpxchg syscall when you have
hardware ll/sc).

Acked-by: Peter Zijlstra (Intel) <[email protected]>

Reply via email to