Re: [PATCH v2 09/13] locking/qspinlock: Merge struct __qspinlock into struct qspinlock

2018-04-11 Thread Waiman Long
On 04/11/2018 02:01 PM, Will Deacon wrote:
> struct __qspinlock provides a handy union of fields so that
> subcomponents of the lockword can be accessed by name, without having to
> manage shifts and masks explicitly and take endianness into account.
>
> This is useful in qspinlock.h and also potentially in arch headers, so
> move the struct __qspinlock into struct qspinlock and kill the extra
> definition.
>
> Cc: Peter Zijlstra 
> Cc: Ingo Molnar 
> Acked-by: Boqun Feng 
> Signed-off-by: Will Deacon 

I think you are using the new qspinlock structure in some of your
earlier patches like patch 4. So you may want to move that earlier in
the series to avoid bisection problem.

Cheers,
Longman


Re: [PATCH v2 09/13] locking/qspinlock: Merge struct __qspinlock into struct qspinlock

2018-04-11 Thread Waiman Long
On 04/11/2018 02:01 PM, Will Deacon wrote:
> struct __qspinlock provides a handy union of fields so that
> subcomponents of the lockword can be accessed by name, without having to
> manage shifts and masks explicitly and take endianness into account.
>
> This is useful in qspinlock.h and also potentially in arch headers, so
> move the struct __qspinlock into struct qspinlock and kill the extra
> definition.
>
> Cc: Peter Zijlstra 
> Cc: Ingo Molnar 
> Acked-by: Boqun Feng 
> Signed-off-by: Will Deacon 

I think you are using the new qspinlock structure in some of your
earlier patches like patch 4. So you may want to move that earlier in
the series to avoid bisection problem.

Cheers,
Longman


[PATCH v2 09/13] locking/qspinlock: Merge struct __qspinlock into struct qspinlock

2018-04-11 Thread Will Deacon
struct __qspinlock provides a handy union of fields so that
subcomponents of the lockword can be accessed by name, without having to
manage shifts and masks explicitly and take endianness into account.

This is useful in qspinlock.h and also potentially in arch headers, so
move the struct __qspinlock into struct qspinlock and kill the extra
definition.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Acked-by: Boqun Feng 
Signed-off-by: Will Deacon 
---
 arch/x86/include/asm/qspinlock.h  |  2 +-
 arch/x86/include/asm/qspinlock_paravirt.h |  3 +-
 include/asm-generic/qspinlock_types.h | 32 +++--
 kernel/locking/qspinlock.c| 46 ++-
 kernel/locking/qspinlock_paravirt.h   | 34 ---
 5 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 2f09915f4aa4..da1370ad206d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -18,7 +18,7 @@
  */
 static inline void native_queued_spin_unlock(struct qspinlock *lock)
 {
-   smp_store_release((u8 *)lock, 0);
+   smp_store_release(>locked, 0);
 }
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h 
b/arch/x86/include/asm/qspinlock_paravirt.h
index 923307ea11c7..9ef5ee03d2d7 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
  *
  * void __pv_queued_spin_unlock(struct qspinlock *lock)
  * {
- * struct __qspinlock *l = (void *)lock;
- * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
  *
  * if (likely(lockval == _Q_LOCKED_VAL))
  * return;
diff --git a/include/asm-generic/qspinlock_types.h 
b/include/asm-generic/qspinlock_types.h
index 034acd0c4956..0763f065b975 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -29,13 +29,41 @@
 #endif
 
 typedef struct qspinlock {
-   atomic_tval;
+   union {
+   atomic_t val;
+
+   /*
+* By using the whole 2nd least significant byte for the
+* pending bit, we can allow better optimization of the lock
+* acquisition for the pending bit holder.
+*/
+#ifdef __LITTLE_ENDIAN
+   struct {
+   u8  locked;
+   u8  pending;
+   };
+   struct {
+   u16 locked_pending;
+   u16 tail;
+   };
+#else
+   struct {
+   u16 tail;
+   u16 locked_pending;
+   };
+   struct {
+   u8  reserved[2];
+   u8  pending;
+   u8  locked;
+   };
+#endif
+   };
 } arch_spinlock_t;
 
 /*
  * Initializier
  */
-#define__ARCH_SPIN_LOCK_UNLOCKED   { ATOMIC_INIT(0) }
+#define__ARCH_SPIN_LOCK_UNLOCKED   { .val = ATOMIC_INIT(0) }
 
 /*
  * Bitfields in the atomic value:
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index c781ddbe59a6..7b8c81ebb15e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -126,40 +126,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 
tail)
 
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
 
-/*
- * By using the whole 2nd least significant byte for the pending bit, we
- * can allow better optimization of the lock acquisition for the pending
- * bit holder.
- *
- * This internal structure is also used by the set_locked function which
- * is not restricted to _Q_PENDING_BITS == 8.
- */
-struct __qspinlock {
-   union {
-   atomic_t val;
-#ifdef __LITTLE_ENDIAN
-   struct {
-   u8  locked;
-   u8  pending;
-   };
-   struct {
-   u16 locked_pending;
-   u16 tail;
-   };
-#else
-   struct {
-   u16 tail;
-   u16 locked_pending;
-   };
-   struct {
-   u8  reserved[2];
-   u8  pending;
-   u8  locked;
-   };
-#endif
-   };
-};
-
 #if _Q_PENDING_BITS == 8
 /**
  * clear_pending - clear the pending bit.
@@ -182,9 +148,7 @@ static __always_inline void clear_pending(struct qspinlock 
*lock)
  */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
-   struct __qspinlock *l = (void *)lock;
-
-   

[PATCH v2 09/13] locking/qspinlock: Merge struct __qspinlock into struct qspinlock

2018-04-11 Thread Will Deacon
struct __qspinlock provides a handy union of fields so that
subcomponents of the lockword can be accessed by name, without having to
manage shifts and masks explicitly and take endianness into account.

This is useful in qspinlock.h and also potentially in arch headers, so
move the struct __qspinlock into struct qspinlock and kill the extra
definition.

Cc: Peter Zijlstra 
Cc: Ingo Molnar 
Acked-by: Boqun Feng 
Signed-off-by: Will Deacon 
---
 arch/x86/include/asm/qspinlock.h  |  2 +-
 arch/x86/include/asm/qspinlock_paravirt.h |  3 +-
 include/asm-generic/qspinlock_types.h | 32 +++--
 kernel/locking/qspinlock.c| 46 ++-
 kernel/locking/qspinlock_paravirt.h   | 34 ---
 5 files changed, 46 insertions(+), 71 deletions(-)

diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h
index 2f09915f4aa4..da1370ad206d 100644
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -18,7 +18,7 @@
  */
 static inline void native_queued_spin_unlock(struct qspinlock *lock)
 {
-   smp_store_release((u8 *)lock, 0);
+   smp_store_release(>locked, 0);
 }
 
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
diff --git a/arch/x86/include/asm/qspinlock_paravirt.h 
b/arch/x86/include/asm/qspinlock_paravirt.h
index 923307ea11c7..9ef5ee03d2d7 100644
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
  *
  * void __pv_queued_spin_unlock(struct qspinlock *lock)
  * {
- * struct __qspinlock *l = (void *)lock;
- * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
+ * u8 lockval = cmpxchg(>locked, _Q_LOCKED_VAL, 0);
  *
  * if (likely(lockval == _Q_LOCKED_VAL))
  * return;
diff --git a/include/asm-generic/qspinlock_types.h 
b/include/asm-generic/qspinlock_types.h
index 034acd0c4956..0763f065b975 100644
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -29,13 +29,41 @@
 #endif
 
 typedef struct qspinlock {
-   atomic_tval;
+   union {
+   atomic_t val;
+
+   /*
+* By using the whole 2nd least significant byte for the
+* pending bit, we can allow better optimization of the lock
+* acquisition for the pending bit holder.
+*/
+#ifdef __LITTLE_ENDIAN
+   struct {
+   u8  locked;
+   u8  pending;
+   };
+   struct {
+   u16 locked_pending;
+   u16 tail;
+   };
+#else
+   struct {
+   u16 tail;
+   u16 locked_pending;
+   };
+   struct {
+   u8  reserved[2];
+   u8  pending;
+   u8  locked;
+   };
+#endif
+   };
 } arch_spinlock_t;
 
 /*
  * Initializier
  */
-#define__ARCH_SPIN_LOCK_UNLOCKED   { ATOMIC_INIT(0) }
+#define__ARCH_SPIN_LOCK_UNLOCKED   { .val = ATOMIC_INIT(0) }
 
 /*
  * Bitfields in the atomic value:
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index c781ddbe59a6..7b8c81ebb15e 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -126,40 +126,6 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 
tail)
 
 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
 
-/*
- * By using the whole 2nd least significant byte for the pending bit, we
- * can allow better optimization of the lock acquisition for the pending
- * bit holder.
- *
- * This internal structure is also used by the set_locked function which
- * is not restricted to _Q_PENDING_BITS == 8.
- */
-struct __qspinlock {
-   union {
-   atomic_t val;
-#ifdef __LITTLE_ENDIAN
-   struct {
-   u8  locked;
-   u8  pending;
-   };
-   struct {
-   u16 locked_pending;
-   u16 tail;
-   };
-#else
-   struct {
-   u16 tail;
-   u16 locked_pending;
-   };
-   struct {
-   u8  reserved[2];
-   u8  pending;
-   u8  locked;
-   };
-#endif
-   };
-};
-
 #if _Q_PENDING_BITS == 8
 /**
  * clear_pending - clear the pending bit.
@@ -182,9 +148,7 @@ static __always_inline void clear_pending(struct qspinlock 
*lock)
  */
 static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
 {
-   struct __qspinlock *l = (void *)lock;
-
-   WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
+   WRITE_ONCE(lock->locked_pending,