Browse Source

Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking changes from Ingo Molnar:
 "The main changes in this cycle were:

   - More gradual enhancements to atomic ops: new atomic*_read_ctrl()
     ops, synchronize atomic_{read,set}() ordering requirements between
     architectures, add atomic_long_t bitops.  (Peter Zijlstra)

   - Add _{relaxed|acquire|release}() variants for inc/dec atomics and
     use them in various locking primitives: mutex, rtmutex, mcs, rwsem.
     This enables weakly ordered architectures (such as arm64) to make
     use of more locking related optimizations.  (Davidlohr Bueso)

   - Implement atomic[64]_{inc,dec}_relaxed() on ARM.  (Will Deacon)

   - Futex kernel data cache footprint micro-optimization.  (Rasmus
     Villemoes)

   - pvqspinlock runtime overhead micro-optimization.  (Waiman Long)

   - misc smaller fixlets"

* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  ARM, locking/atomics: Implement _relaxed variants of atomic[64]_{inc,dec}
  locking/rwsem: Use acquire/release semantics
  locking/mcs: Use acquire/release semantics
  locking/rtmutex: Use acquire/release semantics
  locking/mutex: Use acquire/release semantics
  locking/asm-generic: Add _{relaxed|acquire|release}() variants for inc/dec atomics
  atomic: Implement atomic_read_ctrl()
  atomic, arch: Audit atomic_{read,set}()
  atomic: Add atomic_long_t bitops
  futex: Force hot variables into a single cache line
  locking/pvqspinlock: Kick the PV CPU unconditionally when _Q_SLOW_VAL
  locking/osq: Relax atomic semantics
  locking/qrwlock: Rename ->lock to ->wait_lock
  locking/Documentation/lockstat: Fix typo - lokcing -> locking
  locking/atomics, cmpxchg: Privatize the inclusion of asm/cmpxchg.h
Linus Torvalds 10 years ago
parent
commit
d63a978865
44 changed files with 304 additions and 143 deletions
  1. 4 0
      Documentation/atomic_ops.txt
  2. 1 1
      Documentation/locking/lockstat.txt
  3. 9 8
      Documentation/memory-barriers.txt
  4. 4 4
      arch/alpha/include/asm/atomic.h
  5. 4 4
      arch/arc/include/asm/atomic.h
  6. 6 6
      arch/arm/include/asm/atomic.h
  7. 1 1
      arch/arm64/include/asm/atomic.h
  8. 2 2
      arch/avr32/include/asm/atomic.h
  9. 2 2
      arch/frv/include/asm/atomic.h
  10. 2 2
      arch/h8300/include/asm/atomic.h
  11. 1 1
      arch/hexagon/include/asm/atomic.h
  12. 4 4
      arch/ia64/include/asm/atomic.h
  13. 2 2
      arch/m32r/include/asm/atomic.h
  14. 2 2
      arch/m68k/include/asm/atomic.h
  15. 1 1
      arch/metag/include/asm/atomic_lnkget.h
  16. 1 1
      arch/metag/include/asm/atomic_lock1.h
  17. 4 4
      arch/mips/include/asm/atomic.h
  18. 2 2
      arch/mn10300/include/asm/atomic.h
  19. 1 1
      arch/parisc/include/asm/atomic.h
  20. 2 2
      arch/sh/include/asm/atomic.h
  21. 4 4
      arch/sparc/include/asm/atomic_64.h
  22. 1 1
      arch/tile/include/asm/atomic.h
  23. 3 3
      arch/tile/include/asm/atomic_64.h
  24. 2 2
      arch/x86/include/asm/atomic.h
  25. 2 2
      arch/x86/include/asm/atomic64_64.h
  26. 2 2
      arch/xtensa/include/asm/atomic.h
  27. 1 1
      drivers/net/ethernet/sfc/mcdi.c
  28. 1 2
      drivers/phy/phy-rcar-gen2.c
  29. 1 1
      drivers/staging/speakup/selection.c
  30. 33 23
      include/asm-generic/atomic-long.h
  31. 2 2
      include/asm-generic/atomic.h
  32. 4 4
      include/asm-generic/mutex-dec.h
  33. 5 5
      include/asm-generic/mutex-xchg.h
  34. 2 2
      include/asm-generic/qrwlock_types.h
  35. 14 7
      include/asm-generic/rwsem.h
  36. 117 1
      include/linux/atomic.h
  37. 11 2
      kernel/futex.c
  38. 2 2
      kernel/locking/mcs_spinlock.h
  39. 5 4
      kernel/locking/mutex.c
  40. 8 3
      kernel/locking/osq_lock.c
  41. 4 4
      kernel/locking/qrwlock.c
  42. 1 5
      kernel/locking/qspinlock_paravirt.h
  43. 21 9
      kernel/locking/rtmutex.c
  44. 3 2
      kernel/locking/rwsem-xadd.c

+ 4 - 0
Documentation/atomic_ops.txt

@@ -542,6 +542,10 @@ The routines xchg() and cmpxchg() must provide the same exact
 memory-barrier semantics as the atomic and bit operations returning
 values.
 
+Note: If someone wants to use xchg(), cmpxchg() and their variants,
+linux/atomic.h should be included rather than asm/cmpxchg.h, unless
+the code is in arch/* and can take care of itself.
+
 Spinlocks and rwlocks have memory barrier expectations as well.
 The rule to follow is simple:
 

+ 1 - 1
Documentation/locking/lockstat.txt

@@ -12,7 +12,7 @@ Because things like lock contention can severely impact performance.
 - HOW
 
 Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+lock classes. We build on that (see Documentation/locking/lockdep-design.txt).
 The graph below shows the relation between the lock functions and the various
 hooks therein.
 

+ 9 - 8
Documentation/memory-barriers.txt

@@ -637,7 +637,8 @@ as follows:
 	b = p;  /* BUG: Compiler and CPU can both reorder!!! */
 
 Finally, the READ_ONCE_CTRL() includes an smp_read_barrier_depends()
-that DEC Alpha needs in order to respect control depedencies.
+that DEC Alpha needs in order to respect control depedencies. Alternatively
+use one of atomic{,64}_read_ctrl().
 
 So don't leave out the READ_ONCE_CTRL().
 
@@ -796,9 +797,9 @@ site: https://www.cl.cam.ac.uk/~pes20/ppcmem/index.html.
 
 In summary:
 
-  (*) Control dependencies must be headed by READ_ONCE_CTRL().
-      Or, as a much less preferable alternative, interpose
-      smp_read_barrier_depends() between a READ_ONCE() and the
+  (*) Control dependencies must be headed by READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl(). Or, as a much less preferable alternative,
+      interpose smp_read_barrier_depends() between a READ_ONCE() and the
       control-dependent write.
 
   (*) Control dependencies can order prior loads against later stores.
@@ -820,10 +821,10 @@ In summary:
       and WRITE_ONCE() can help to preserve the needed conditional.
 
   (*) Control dependencies require that the compiler avoid reordering the
-      dependency into nonexistence.  Careful use of READ_ONCE_CTRL()
-      or smp_read_barrier_depends() can help to preserve your control
-      dependency.  Please see the Compiler Barrier section for more
-      information.
+      dependency into nonexistence.  Careful use of READ_ONCE_CTRL(),
+      atomic{,64}_read_ctrl() or smp_read_barrier_depends() can help to
+      preserve your control dependency.  Please see the Compiler Barrier
+      section for more information.
 
   (*) Control dependencies pair normally with other types of barriers.
 

+ 4 - 4
arch/alpha/include/asm/atomic.h

@@ -17,11 +17,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		((v)->counter = (i))
-#define atomic64_set(v,i)	((v)->counter = (i))
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
+#define atomic64_set(v,i)	WRITE_ONCE((v)->counter, (i))
 
 /*
  * To get proper branch prediction for the main line, we must branch

+ 4 - 4
arch/arc/include/asm/atomic.h

@@ -17,11 +17,11 @@
 #include <asm/barrier.h>
 #include <asm/smp.h>
 
-#define atomic_read(v)  ((v)->counter)
+#define atomic_read(v)  READ_ONCE((v)->counter)
 
 #ifdef CONFIG_ARC_HAS_LLSC
 
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_ARC_STAR_9000923308
 
@@ -107,7 +107,7 @@ static inline int atomic_##op##_return(int i, atomic_t *v)		\
 #ifndef CONFIG_SMP
 
  /* violating atomic_xxx API locking protocol in UP for optimization sake */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #else
 
@@ -125,7 +125,7 @@ static inline void atomic_set(atomic_t *v, int i)
 	unsigned long flags;
 
 	atomic_ops_lock(flags);
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 	atomic_ops_unlock(flags);
 }
 

+ 6 - 6
arch/arm/include/asm/atomic.h

@@ -27,8 +27,8 @@
  * strex/ldrex monitor on some implementations. The reason we can use it for
  * atomic_set() is the clrex or dummy strex done on every exception return.
  */
-#define atomic_read(v)	ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)	(((v)->counter) = (i))
+#define atomic_read(v)	READ_ONCE((v)->counter)
+#define atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #if __LINUX_ARM_ARCH__ >= 6
 
@@ -210,8 +210,8 @@ ATOMIC_OP(xor, ^=, eor)
 
 #define atomic_inc_and_test(v)	(atomic_add_return(1, v) == 0)
 #define atomic_dec_and_test(v)	(atomic_sub_return(1, v) == 0)
-#define atomic_inc_return(v)    (atomic_add_return(1, v))
-#define atomic_dec_return(v)    (atomic_sub_return(1, v))
+#define atomic_inc_return_relaxed(v)    (atomic_add_return_relaxed(1, v))
+#define atomic_dec_return_relaxed(v)    (atomic_sub_return_relaxed(1, v))
 #define atomic_sub_and_test(i, v) (atomic_sub_return(i, v) == 0)
 
 #define atomic_add_negative(i,v) (atomic_add_return(i, v) < 0)
@@ -442,11 +442,11 @@ static inline int atomic64_add_unless(atomic64_t *v, long long a, long long u)
 
 #define atomic64_add_negative(a, v)	(atomic64_add_return((a), (v)) < 0)
 #define atomic64_inc(v)			atomic64_add(1LL, (v))
-#define atomic64_inc_return(v)		atomic64_add_return(1LL, (v))
+#define atomic64_inc_return_relaxed(v)	atomic64_add_return_relaxed(1LL, (v))
 #define atomic64_inc_and_test(v)	(atomic64_inc_return(v) == 0)
 #define atomic64_sub_and_test(a, v)	(atomic64_sub_return((a), (v)) == 0)
 #define atomic64_dec(v)			atomic64_sub(1LL, (v))
-#define atomic64_dec_return(v)		atomic64_sub_return(1LL, (v))
+#define atomic64_dec_return_relaxed(v)	atomic64_sub_return_relaxed(1LL, (v))
 #define atomic64_dec_and_test(v)	(atomic64_dec_return((v)) == 0)
 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1LL, 0LL)
 

+ 1 - 1
arch/arm64/include/asm/atomic.h

@@ -54,7 +54,7 @@
 #define ATOMIC_INIT(i)	{ (i) }
 
 #define atomic_read(v)			READ_ONCE((v)->counter)
-#define atomic_set(v, i)		(((v)->counter) = (i))
+#define atomic_set(v, i)		WRITE_ONCE(((v)->counter), (i))
 #define atomic_xchg(v, new)		xchg(&((v)->counter), (new))
 #define atomic_cmpxchg(v, old, new)	cmpxchg(&((v)->counter), (old), (new))
 

+ 2 - 2
arch/avr32/include/asm/atomic.h

@@ -19,8 +19,8 @@
 
 #define ATOMIC_INIT(i)  { (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP_RETURN(op, asm_op, asm_con)				\
 static inline int __atomic_##op##_return(int i, atomic_t *v)		\

+ 2 - 2
arch/frv/include/asm/atomic.h

@@ -32,8 +32,8 @@
  */
 
 #define ATOMIC_INIT(i)		{ (i) }
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = (i))
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 static inline int atomic_inc_return(atomic_t *v)
 {

+ 2 - 2
arch/h8300/include/asm/atomic.h

@@ -11,8 +11,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/kernel.h>
 

+ 1 - 1
arch/hexagon/include/asm/atomic.h

@@ -48,7 +48,7 @@ static inline void atomic_set(atomic_t *v, int new)
  *
  * Assumes all word reads on our architecture are atomic.
  */
-#define atomic_read(v)		((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
  * atomic_xchg - atomic

+ 4 - 4
arch/ia64/include/asm/atomic.h

@@ -21,11 +21,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v,i)		(((v)->counter) = (i))
-#define atomic64_set(v,i)	(((v)->counter) = (i))
+#define atomic_set(v,i)		WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op, c_op)						\
 static __inline__ int							\

+ 2 - 2
arch/m32r/include/asm/atomic.h

@@ -28,7 +28,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)	READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -37,7 +37,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)	(((v)->counter) = (i))
+#define atomic_set(v,i)	WRITE_ONCE(((v)->counter), (i))
 
 #ifdef CONFIG_CHIP_M32700_TS1
 #define __ATOMIC_CLOBBER	, "r4"

+ 2 - 2
arch/m68k/include/asm/atomic.h

@@ -17,8 +17,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v, i)	(((v)->counter) = i)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 /*
  * The ColdFire parts cannot do some immediate to memory operations,

+ 1 - 1
arch/metag/include/asm/atomic_lnkget.h

@@ -3,7 +3,7 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_set(v, i)		((v)->counter = (i))
+#define atomic_set(v, i)		WRITE_ONCE((v)->counter, (i))
 
 #include <linux/compiler.h>
 

+ 1 - 1
arch/metag/include/asm/atomic_lock1.h

@@ -10,7 +10,7 @@
 
 static inline int atomic_read(const atomic_t *v)
 {
-	return (v)->counter;
+	return READ_ONCE((v)->counter);
 }
 
 /*

+ 4 - 4
arch/mips/include/asm/atomic.h

@@ -30,7 +30,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /*
  * atomic_set - set atomic variable
@@ -39,7 +39,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i)		((v)->counter = (i))
+#define atomic_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC_OP(op, c_op, asm_op)					      \
 static __inline__ void atomic_##op(int i, atomic_t * v)			      \
@@ -315,14 +315,14 @@ static __inline__ int __atomic_add_unless(atomic_t *v, int a, int u)
  * @v: pointer of type atomic64_t
  *
  */
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
 /*
  * atomic64_set - set atomic variable
  * @v: pointer of type atomic64_t
  * @i: required value
  */
-#define atomic64_set(v, i)	((v)->counter = (i))
+#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 #define ATOMIC64_OP(op, c_op, asm_op)					      \
 static __inline__ void atomic64_##op(long i, atomic64_t * v)		      \

+ 2 - 2
arch/mn10300/include/asm/atomic.h

@@ -34,7 +34,7 @@
  *
  * Atomically reads the value of @v.  Note that the guaranteed
  */
-#define atomic_read(v)	(ACCESS_ONCE((v)->counter))
+#define atomic_read(v)	READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -43,7 +43,7 @@
  *
  * Atomically sets the value of @v to @i.  Note that the guaranteed
  */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)							\
 static inline void atomic_##op(int i, atomic_t *v)			\

+ 1 - 1
arch/parisc/include/asm/atomic.h

@@ -67,7 +67,7 @@ static __inline__ void atomic_set(atomic_t *v, int i)
 
 static __inline__ int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /* exported interface */

+ 2 - 2
arch/sh/include/asm/atomic.h

@@ -14,8 +14,8 @@
 
 #define ATOMIC_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic_set(v,i)		((v)->counter = (i))
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if defined(CONFIG_GUSA_RB)
 #include <asm/atomic-grb.h>

+ 4 - 4
arch/sparc/include/asm/atomic_64.h

@@ -14,11 +14,11 @@
 #define ATOMIC_INIT(i)		{ (i) }
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
-#define atomic64_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
+#define atomic64_read(v)	READ_ONCE((v)->counter)
 
-#define atomic_set(v, i)	(((v)->counter) = i)
-#define atomic64_set(v, i)	(((v)->counter) = i)
+#define atomic_set(v, i)	WRITE_ONCE(((v)->counter), (i))
+#define atomic64_set(v, i)	WRITE_ONCE(((v)->counter), (i))
 
 #define ATOMIC_OP(op)							\
 void atomic_##op(int, atomic_t *);					\

+ 1 - 1
arch/tile/include/asm/atomic.h

@@ -34,7 +34,7 @@
  */
 static inline int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE(v->counter);
+	return READ_ONCE(v->counter);
 }
 
 /**

+ 3 - 3
arch/tile/include/asm/atomic_64.h

@@ -24,7 +24,7 @@
 
 /* First, the 32-bit atomic ops that are "real" on our 64-bit platform. */
 
-#define atomic_set(v, i) ((v)->counter = (i))
+#define atomic_set(v, i) WRITE_ONCE((v)->counter, (i))
 
 /*
  * The smp_mb() operations throughout are to support the fact that
@@ -82,8 +82,8 @@ static inline void atomic_xor(int i, atomic_t *v)
 
 #define ATOMIC64_INIT(i)	{ (i) }
 
-#define atomic64_read(v)		((v)->counter)
-#define atomic64_set(v, i) ((v)->counter = (i))
+#define atomic64_read(v)	READ_ONCE((v)->counter)
+#define atomic64_set(v, i)	WRITE_ONCE((v)->counter, (i))
 
 static inline void atomic64_add(long i, atomic64_t *v)
 {

+ 2 - 2
arch/x86/include/asm/atomic.h

@@ -24,7 +24,7 @@
  */
 static __always_inline int atomic_read(const atomic_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /**
@@ -36,7 +36,7 @@ static __always_inline int atomic_read(const atomic_t *v)
  */
 static __always_inline void atomic_set(atomic_t *v, int i)
 {
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 }
 
 /**

+ 2 - 2
arch/x86/include/asm/atomic64_64.h

@@ -18,7 +18,7 @@
  */
 static inline long atomic64_read(const atomic64_t *v)
 {
-	return ACCESS_ONCE((v)->counter);
+	return READ_ONCE((v)->counter);
 }
 
 /**
@@ -30,7 +30,7 @@ static inline long atomic64_read(const atomic64_t *v)
  */
 static inline void atomic64_set(atomic64_t *v, long i)
 {
-	v->counter = i;
+	WRITE_ONCE(v->counter, i);
 }
 
 /**

+ 2 - 2
arch/xtensa/include/asm/atomic.h

@@ -47,7 +47,7 @@
  *
  * Atomically reads the value of @v.
  */
-#define atomic_read(v)		ACCESS_ONCE((v)->counter)
+#define atomic_read(v)		READ_ONCE((v)->counter)
 
 /**
  * atomic_set - set atomic variable
@@ -56,7 +56,7 @@
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v,i)		((v)->counter = (i))
+#define atomic_set(v,i)		WRITE_ONCE((v)->counter, (i))
 
 #if XCHAL_HAVE_S32C1I
 #define ATOMIC_OP(op)							\

+ 1 - 1
drivers/net/ethernet/sfc/mcdi.c

@@ -9,7 +9,7 @@
 
 #include <linux/delay.h>
 #include <linux/moduleparam.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 #include "net_driver.h"
 #include "nic.h"
 #include "io.h"

+ 1 - 2
drivers/phy/phy-rcar-gen2.c

@@ -17,8 +17,7 @@
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
-
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 
 #define USBHS_LPSTS			0x02
 #define USBHS_UGCTRL			0x80

+ 1 - 1
drivers/staging/speakup/selection.c

@@ -7,7 +7,7 @@
 #include <linux/workqueue.h>
 #include <linux/tty.h>
 #include <linux/tty_flip.h>
-#include <asm/cmpxchg.h>
+#include <linux/atomic.h>
 
 #include "speakup.h"
 

+ 33 - 23
include/asm-generic/atomic-long.h

@@ -35,7 +35,7 @@ typedef atomic_t atomic_long_t;
 #endif
 
 #define ATOMIC_LONG_READ_OP(mo)						\
-static inline long atomic_long_read##mo(atomic_long_t *l)		\
+static inline long atomic_long_read##mo(const atomic_long_t *l)		\
 {									\
 	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
 									\
@@ -43,6 +43,7 @@ static inline long atomic_long_read##mo(atomic_long_t *l)		\
 }
 ATOMIC_LONG_READ_OP()
 ATOMIC_LONG_READ_OP(_acquire)
+ATOMIC_LONG_READ_OP(_ctrl)
 
 #undef ATOMIC_LONG_READ_OP
 
@@ -112,19 +113,23 @@ static inline void atomic_long_dec(atomic_long_t *l)
 	ATOMIC_LONG_PFX(_dec)(v);
 }
 
-static inline void atomic_long_add(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	ATOMIC_LONG_PFX(_add)(i, v);
+#define ATOMIC_LONG_OP(op)						\
+static inline void							\
+atomic_long_##op(long i, atomic_long_t *l)				\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	ATOMIC_LONG_PFX(_##op)(i, v);					\
 }
 
-static inline void atomic_long_sub(long i, atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
+ATOMIC_LONG_OP(add)
+ATOMIC_LONG_OP(sub)
+ATOMIC_LONG_OP(and)
+ATOMIC_LONG_OP(or)
+ATOMIC_LONG_OP(xor)
+ATOMIC_LONG_OP(andnot)
 
-	ATOMIC_LONG_PFX(_sub)(i, v);
-}
+#undef ATOMIC_LONG_OP
 
 static inline int atomic_long_sub_and_test(long i, atomic_long_t *l)
 {
@@ -154,19 +159,24 @@ static inline int atomic_long_add_negative(long i, atomic_long_t *l)
 	return ATOMIC_LONG_PFX(_add_negative)(i, v);
 }
 
-static inline long atomic_long_inc_return(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_inc_return)(v);
-}
-
-static inline long atomic_long_dec_return(atomic_long_t *l)
-{
-	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;
-
-	return (long)ATOMIC_LONG_PFX(_dec_return)(v);
+#define ATOMIC_LONG_INC_DEC_OP(op, mo)					\
+static inline long							\
+atomic_long_##op##_return##mo(atomic_long_t *l)				\
+{									\
+	ATOMIC_LONG_PFX(_t) *v = (ATOMIC_LONG_PFX(_t) *)l;		\
+									\
+	return (long)ATOMIC_LONG_PFX(_##op##_return##mo)(v);		\
 }
+ATOMIC_LONG_INC_DEC_OP(inc,)
+ATOMIC_LONG_INC_DEC_OP(inc, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(inc, _acquire)
+ATOMIC_LONG_INC_DEC_OP(inc, _release)
+ATOMIC_LONG_INC_DEC_OP(dec,)
+ATOMIC_LONG_INC_DEC_OP(dec, _relaxed)
+ATOMIC_LONG_INC_DEC_OP(dec, _acquire)
+ATOMIC_LONG_INC_DEC_OP(dec, _release)
+
+#undef ATOMIC_LONG_INC_DEC_OP
 
 static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 {

+ 2 - 2
include/asm-generic/atomic.h

@@ -127,7 +127,7 @@ ATOMIC_OP(xor, ^)
  * Atomically reads the value of @v.
  */
 #ifndef atomic_read
-#define atomic_read(v)	ACCESS_ONCE((v)->counter)
+#define atomic_read(v)	READ_ONCE((v)->counter)
 #endif
 
 /**
@@ -137,7 +137,7 @@ ATOMIC_OP(xor, ^)
  *
  * Atomically sets the value of @v to @i.
  */
-#define atomic_set(v, i) (((v)->counter) = (i))
+#define atomic_set(v, i) WRITE_ONCE(((v)->counter), (i))
 
 #include <linux/irqflags.h>
 

+ 4 - 4
include/asm-generic/mutex-dec.h

@@ -20,7 +20,7 @@
 static inline void
 __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-	if (unlikely(atomic_dec_return(count) < 0))
+	if (unlikely(atomic_dec_return_acquire(count) < 0))
 		fail_fn(count);
 }
 
@@ -35,7 +35,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_lock_retval(atomic_t *count)
 {
-	if (unlikely(atomic_dec_return(count) < 0))
+	if (unlikely(atomic_dec_return_acquire(count) < 0))
 		return -1;
 	return 0;
 }
@@ -56,7 +56,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-	if (unlikely(atomic_inc_return(count) <= 0))
+	if (unlikely(atomic_inc_return_release(count) <= 0))
 		fail_fn(count);
 }
 
@@ -80,7 +80,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	if (likely(atomic_cmpxchg(count, 1, 0) == 1))
+	if (likely(atomic_cmpxchg_acquire(count, 1, 0) == 1))
 		return 1;
 	return 0;
 }

+ 5 - 5
include/asm-generic/mutex-xchg.h

@@ -31,7 +31,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 		 * to ensure that any waiting tasks are woken up by the
 		 * unlock slow path.
 		 */
-		if (likely(atomic_xchg(count, -1) != 1))
+		if (likely(atomic_xchg_acquire(count, -1) != 1))
 			fail_fn(count);
 }
 
@@ -46,7 +46,7 @@ __mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_lock_retval(atomic_t *count)
 {
-	if (unlikely(atomic_xchg(count, 0) != 1))
+	if (unlikely(atomic_xchg_acquire(count, 0) != 1))
 		if (likely(atomic_xchg(count, -1) != 1))
 			return -1;
 	return 0;
@@ -67,7 +67,7 @@ __mutex_fastpath_lock_retval(atomic_t *count)
 static inline void
 __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 {
-	if (unlikely(atomic_xchg(count, 1) != 0))
+	if (unlikely(atomic_xchg_release(count, 1) != 0))
 		fail_fn(count);
 }
 
@@ -91,7 +91,7 @@ __mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
 static inline int
 __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 {
-	int prev = atomic_xchg(count, 0);
+	int prev = atomic_xchg_acquire(count, 0);
 
 	if (unlikely(prev < 0)) {
 		/*
@@ -105,7 +105,7 @@ __mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
 		 *   owner's unlock path needlessly, but that's not a problem
 		 *   in practice. ]
 		 */
-		prev = atomic_xchg(count, prev);
+		prev = atomic_xchg_acquire(count, prev);
 		if (prev < 0)
 			prev = 0;
 	}

+ 2 - 2
include/asm-generic/qrwlock_types.h

@@ -10,12 +10,12 @@
 
 typedef struct qrwlock {
 	atomic_t		cnts;
-	arch_spinlock_t		lock;
+	arch_spinlock_t		wait_lock;
 } arch_rwlock_t;
 
 #define	__ARCH_RW_LOCK_UNLOCKED {		\
 	.cnts = ATOMIC_INIT(0),			\
-	.lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
+	.wait_lock = __ARCH_SPIN_LOCK_UNLOCKED,	\
 }
 
 #endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */

+ 14 - 7
include/asm-generic/rwsem.h

@@ -33,7 +33,7 @@
  */
 static inline void __down_read(struct rw_semaphore *sem)
 {
-	if (unlikely(atomic_long_inc_return((atomic_long_t *)&sem->count) <= 0))
+	if (unlikely(atomic_long_inc_return_acquire((atomic_long_t *)&sem->count) <= 0))
 		rwsem_down_read_failed(sem);
 }
 
@@ -42,7 +42,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem)
 	long tmp;
 
 	while ((tmp = sem->count) >= 0) {
-		if (tmp == cmpxchg(&sem->count, tmp,
+		if (tmp == cmpxchg_acquire(&sem->count, tmp,
 				   tmp + RWSEM_ACTIVE_READ_BIAS)) {
 			return 1;
 		}
@@ -57,7 +57,7 @@ static inline void __down_write_nested(struct rw_semaphore *sem, int subclass)
 {
 	long tmp;
 
-	tmp = atomic_long_add_return(RWSEM_ACTIVE_WRITE_BIAS,
+	tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
 				     (atomic_long_t *)&sem->count);
 	if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
 		rwsem_down_write_failed(sem);
@@ -72,7 +72,7 @@ static inline int __down_write_trylock(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	tmp = cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
+	tmp = cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
 		      RWSEM_ACTIVE_WRITE_BIAS);
 	return tmp == RWSEM_UNLOCKED_VALUE;
 }
@@ -84,7 +84,7 @@ static inline void __up_read(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	tmp = atomic_long_dec_return((atomic_long_t *)&sem->count);
+	tmp = atomic_long_dec_return_release((atomic_long_t *)&sem->count);
 	if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
 		rwsem_wake(sem);
 }
@@ -94,7 +94,7 @@ static inline void __up_read(struct rw_semaphore *sem)
  */
 static inline void __up_write(struct rw_semaphore *sem)
 {
-	if (unlikely(atomic_long_sub_return(RWSEM_ACTIVE_WRITE_BIAS,
+	if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
 				 (atomic_long_t *)&sem->count) < 0))
 		rwsem_wake(sem);
 }
@@ -114,7 +114,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem)
 {
 	long tmp;
 
-	tmp = atomic_long_add_return(-RWSEM_WAITING_BIAS,
+	/*
+	 * When downgrading from exclusive to shared ownership,
+	 * anything inside the write-locked region cannot leak
+	 * into the read side. In contrast, anything in the
+	 * read-locked region is ok to be re-ordered into the
+	 * write side. As such, rely on RELEASE semantics.
+	 */
+	tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS,
 				     (atomic_long_t *)&sem->count);
 	if (tmp < 0)
 		rwsem_downgrade_wake(sem);

+ 117 - 1
include/linux/atomic.h

@@ -4,6 +4,15 @@
 #include <asm/atomic.h>
 #include <asm/barrier.h>
 
+#ifndef atomic_read_ctrl
+static inline int atomic_read_ctrl(const atomic_t *v)
+{
+	int val = atomic_read(v);
+	smp_read_barrier_depends(); /* Enforce control dependency. */
+	return val;
+}
+#endif
+
 /*
  * Relaxed variants of xchg, cmpxchg and some atomic operations.
  *
@@ -81,6 +90,30 @@
 #endif
 #endif /* atomic_add_return_relaxed */
 
+/* atomic_inc_return_relaxed */
+#ifndef atomic_inc_return_relaxed
+#define  atomic_inc_return_relaxed	atomic_inc_return
+#define  atomic_inc_return_acquire	atomic_inc_return
+#define  atomic_inc_return_release	atomic_inc_return
+
+#else /* atomic_inc_return_relaxed */
+
+#ifndef atomic_inc_return_acquire
+#define  atomic_inc_return_acquire(...)					\
+	__atomic_op_acquire(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return_release
+#define  atomic_inc_return_release(...)					\
+	__atomic_op_release(atomic_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_inc_return
+#define  atomic_inc_return(...)						\
+	__atomic_op_fence(atomic_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic_inc_return_relaxed */
+
 /* atomic_sub_return_relaxed */
 #ifndef atomic_sub_return_relaxed
 #define  atomic_sub_return_relaxed	atomic_sub_return
@@ -105,6 +138,30 @@
 #endif
 #endif /* atomic_sub_return_relaxed */
 
+/* atomic_dec_return_relaxed */
+#ifndef atomic_dec_return_relaxed
+#define  atomic_dec_return_relaxed	atomic_dec_return
+#define  atomic_dec_return_acquire	atomic_dec_return
+#define  atomic_dec_return_release	atomic_dec_return
+
+#else /* atomic_dec_return_relaxed */
+
+#ifndef atomic_dec_return_acquire
+#define  atomic_dec_return_acquire(...)					\
+	__atomic_op_acquire(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return_release
+#define  atomic_dec_return_release(...)					\
+	__atomic_op_release(atomic_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic_dec_return
+#define  atomic_dec_return(...)						\
+	__atomic_op_fence(atomic_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic_dec_return_relaxed */
+
 /* atomic_xchg_relaxed */
 #ifndef atomic_xchg_relaxed
 #define  atomic_xchg_relaxed		atomic_xchg
@@ -185,6 +242,31 @@
 #endif
 #endif /* atomic64_add_return_relaxed */
 
+/* atomic64_inc_return_relaxed */
+#ifndef atomic64_inc_return_relaxed
+#define  atomic64_inc_return_relaxed	atomic64_inc_return
+#define  atomic64_inc_return_acquire	atomic64_inc_return
+#define  atomic64_inc_return_release	atomic64_inc_return
+
+#else /* atomic64_inc_return_relaxed */
+
+#ifndef atomic64_inc_return_acquire
+#define  atomic64_inc_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return_release
+#define  atomic64_inc_return_release(...)				\
+	__atomic_op_release(atomic64_inc_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_inc_return
+#define  atomic64_inc_return(...)					\
+	__atomic_op_fence(atomic64_inc_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_inc_return_relaxed */
+
+
 /* atomic64_sub_return_relaxed */
 #ifndef atomic64_sub_return_relaxed
 #define  atomic64_sub_return_relaxed	atomic64_sub_return
@@ -209,6 +291,30 @@
 #endif
 #endif /* atomic64_sub_return_relaxed */
 
+/* atomic64_dec_return_relaxed */
+#ifndef atomic64_dec_return_relaxed
+#define  atomic64_dec_return_relaxed	atomic64_dec_return
+#define  atomic64_dec_return_acquire	atomic64_dec_return
+#define  atomic64_dec_return_release	atomic64_dec_return
+
+#else /* atomic64_dec_return_relaxed */
+
+#ifndef atomic64_dec_return_acquire
+#define  atomic64_dec_return_acquire(...)				\
+	__atomic_op_acquire(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return_release
+#define  atomic64_dec_return_release(...)				\
+	__atomic_op_release(atomic64_dec_return, __VA_ARGS__)
+#endif
+
+#ifndef atomic64_dec_return
+#define  atomic64_dec_return(...)					\
+	__atomic_op_fence(atomic64_dec_return, __VA_ARGS__)
+#endif
+#endif /* atomic64_dec_return_relaxed */
+
 /* atomic64_xchg_relaxed */
 #ifndef atomic64_xchg_relaxed
 #define  atomic64_xchg_relaxed		atomic64_xchg
@@ -451,11 +557,19 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
-#include <asm-generic/atomic-long.h>
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
 
+#ifndef atomic64_read_ctrl
+static inline long long atomic64_read_ctrl(const atomic64_t *v)
+{
+	long long val = atomic64_read(v);
+	smp_read_barrier_depends(); /* Enforce control dependency. */
+	return val;
+}
+#endif
+
 #ifndef atomic64_andnot
 static inline void atomic64_andnot(long long i, atomic64_t *v)
 {
@@ -463,4 +577,6 @@ static inline void atomic64_andnot(long long i, atomic64_t *v)
 }
 #endif
 
+#include <asm-generic/atomic-long.h>
+
 #endif /* _LINUX_ATOMIC_H */

+ 11 - 2
kernel/futex.c

@@ -255,9 +255,18 @@ struct futex_hash_bucket {
 	struct plist_head chain;
 } ____cacheline_aligned_in_smp;
 
-static unsigned long __read_mostly futex_hashsize;
+/*
+ * The base of the bucket array and its size are always used together
+ * (after initialization only in hash_futex()), so ensure that they
+ * reside in the same cacheline.
+ */
+static struct {
+	struct futex_hash_bucket *queues;
+	unsigned long            hashsize;
+} __futex_data __read_mostly __aligned(2*sizeof(long));
+#define futex_queues   (__futex_data.queues)
+#define futex_hashsize (__futex_data.hashsize)
 
-static struct futex_hash_bucket *futex_queues;
 
 /*
  * Fault injections for futexes.

+ 2 - 2
kernel/locking/mcs_spinlock.h

@@ -67,7 +67,7 @@ void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 	node->locked = 0;
 	node->next   = NULL;
 
-	prev = xchg(lock, node);
+	prev = xchg_acquire(lock, node);
 	if (likely(prev == NULL)) {
 		/*
 		 * Lock acquired, don't need to set node->locked to 1. Threads
@@ -98,7 +98,7 @@ void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node)
 		/*
 		 * Release the lock by setting it to NULL
 		 */
-		if (likely(cmpxchg(lock, node, NULL) == node))
+		if (likely(cmpxchg_release(lock, node, NULL) == node))
 			return;
 		/* Wait until the next pointer is set */
 		while (!(next = READ_ONCE(node->next)))

+ 5 - 4
kernel/locking/mutex.c

@@ -277,7 +277,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
 static inline bool mutex_try_to_acquire(struct mutex *lock)
 {
 	return !mutex_is_locked(lock) &&
-		(atomic_cmpxchg(&lock->count, 1, 0) == 1);
+		(atomic_cmpxchg_acquire(&lock->count, 1, 0) == 1);
 }
 
 /*
@@ -529,7 +529,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 	 * Once more, try to acquire the lock. Only try-lock the mutex if
 	 * it is unlocked to reduce unnecessary xchg() operations.
 	 */
-	if (!mutex_is_locked(lock) && (atomic_xchg(&lock->count, 0) == 1))
+	if (!mutex_is_locked(lock) &&
+	    (atomic_xchg_acquire(&lock->count, 0) == 1))
 		goto skip_wait;
 
 	debug_mutex_lock_common(lock, &waiter);
@@ -553,7 +554,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 		 * non-negative in order to avoid unnecessary xchg operations:
 		 */
 		if (atomic_read(&lock->count) >= 0 &&
-		    (atomic_xchg(&lock->count, -1) == 1))
+		    (atomic_xchg_acquire(&lock->count, -1) == 1))
 			break;
 
 		/*
@@ -867,7 +868,7 @@ static inline int __mutex_trylock_slowpath(atomic_t *lock_count)
 
 	spin_lock_mutex(&lock->wait_lock, flags);
 
-	prev = atomic_xchg(&lock->count, -1);
+	prev = atomic_xchg_acquire(&lock->count, -1);
 	if (likely(prev == 1)) {
 		mutex_set_owner(lock);
 		mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);

+ 8 - 3
kernel/locking/osq_lock.c

@@ -50,7 +50,7 @@ osq_wait_next(struct optimistic_spin_queue *lock,
 
 	for (;;) {
 		if (atomic_read(&lock->tail) == curr &&
-		    atomic_cmpxchg(&lock->tail, curr, old) == curr) {
+		    atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) {
 			/*
 			 * We were the last queued, we moved @lock back. @prev
 			 * will now observe @lock and will complete its
@@ -92,7 +92,11 @@ bool osq_lock(struct optimistic_spin_queue *lock)
 	node->next = NULL;
 	node->cpu = curr;
 
-	old = atomic_xchg(&lock->tail, curr);
+	/*
+	 * ACQUIRE semantics, pairs with corresponding RELEASE
+	 * in unlock() uncontended, or fastpath.
+	 */
+	old = atomic_xchg_acquire(&lock->tail, curr);
 	if (old == OSQ_UNLOCKED_VAL)
 		return true;
 
@@ -184,7 +188,8 @@ void osq_unlock(struct optimistic_spin_queue *lock)
 	/*
 	 * Fast path for the uncontended case.
 	 */
-	if (likely(atomic_cmpxchg(&lock->tail, curr, OSQ_UNLOCKED_VAL) == curr))
+	if (likely(atomic_cmpxchg_release(&lock->tail, curr,
+					  OSQ_UNLOCKED_VAL) == curr))
 		return;
 
 	/*

+ 4 - 4
kernel/locking/qrwlock.c

@@ -86,7 +86,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	/*
 	 * Put the reader into the wait queue
 	 */
-	arch_spin_lock(&lock->lock);
+	arch_spin_lock(&lock->wait_lock);
 
 	/*
 	 * The ACQUIRE semantics of the following spinning code ensure
@@ -99,7 +99,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
 	/*
 	 * Signal the next one in queue to become queue head
 	 */
-	arch_spin_unlock(&lock->lock);
+	arch_spin_unlock(&lock->wait_lock);
 }
 EXPORT_SYMBOL(queued_read_lock_slowpath);
 
@@ -112,7 +112,7 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 	u32 cnts;
 
 	/* Put the writer into the wait queue */
-	arch_spin_lock(&lock->lock);
+	arch_spin_lock(&lock->wait_lock);
 
 	/* Try to acquire the lock directly if no reader is present */
 	if (!atomic_read(&lock->cnts) &&
@@ -144,6 +144,6 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
 		cpu_relax_lowlatency();
 	}
 unlock:
-	arch_spin_unlock(&lock->lock);
+	arch_spin_unlock(&lock->wait_lock);
 }
 EXPORT_SYMBOL(queued_write_lock_slowpath);

+ 1 - 5
kernel/locking/qspinlock_paravirt.h

@@ -267,7 +267,6 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 		}
 
 		if (!lp) { /* ONCE */
-			WRITE_ONCE(pn->state, vcpu_hashed);
 			lp = pv_hash(lock, pn);
 
 			/*
@@ -275,11 +274,9 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 			 * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
 			 * we'll be sure to be able to observe our hash entry.
 			 *
-			 *   [S] pn->state
 			 *   [S] <hash>                 [Rmw] l->locked == _Q_SLOW_VAL
 			 *       MB                           RMB
 			 * [RmW] l->locked = _Q_SLOW_VAL  [L] <unhash>
-			 *                                [L] pn->state
 			 *
 			 * Matches the smp_rmb() in __pv_queued_spin_unlock().
 			 */
@@ -364,8 +361,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 	 * vCPU is harmless other than the additional latency in completing
 	 * the unlock.
 	 */
-	if (READ_ONCE(node->state) == vcpu_hashed)
-		pv_kick(node->cpu);
+	pv_kick(node->cpu);
 }
 /*
  * Include the architecture specific callee-save thunk of the

+ 21 - 9
kernel/locking/rtmutex.c

@@ -74,14 +74,23 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
  * set up.
  */
 #ifndef CONFIG_DEBUG_RT_MUTEXES
-# define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_relaxed(l,c,n) (cmpxchg_relaxed(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_acquire(l,c,n) (cmpxchg_acquire(&l->owner, c, n) == c)
+# define rt_mutex_cmpxchg_release(l,c,n) (cmpxchg_release(&l->owner, c, n) == c)
+
+/*
+ * Callers must hold the ->wait_lock -- which is the whole purpose as we force
+ * all future threads that attempt to [Rmw] the lock to the slowpath. As such
+ * relaxed semantics suffice.
+ */
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 {
 	unsigned long owner, *p = (unsigned long *) &lock->owner;
 
 	do {
 		owner = *p;
-	} while (cmpxchg(p, owner, owner | RT_MUTEX_HAS_WAITERS) != owner);
+	} while (cmpxchg_relaxed(p, owner,
+				 owner | RT_MUTEX_HAS_WAITERS) != owner);
 }
 
 /*
@@ -121,11 +130,14 @@ static inline bool unlock_rt_mutex_safe(struct rt_mutex *lock)
 	 *					lock(wait_lock);
 	 *					acquire(lock);
 	 */
-	return rt_mutex_cmpxchg(lock, owner, NULL);
+	return rt_mutex_cmpxchg_release(lock, owner, NULL);
 }
 
 #else
-# define rt_mutex_cmpxchg(l,c,n)	(0)
+# define rt_mutex_cmpxchg_relaxed(l,c,n)	(0)
+# define rt_mutex_cmpxchg_acquire(l,c,n)	(0)
+# define rt_mutex_cmpxchg_release(l,c,n)	(0)
+
 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
 {
 	lock->owner = (struct task_struct *)
@@ -1321,7 +1333,7 @@ rt_mutex_fastlock(struct rt_mutex *lock, int state,
 				struct hrtimer_sleeper *timeout,
 				enum rtmutex_chainwalk chwalk))
 {
-	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 0;
 	} else
@@ -1337,7 +1349,7 @@ rt_mutex_timed_fastlock(struct rt_mutex *lock, int state,
 				      enum rtmutex_chainwalk chwalk))
 {
 	if (chwalk == RT_MUTEX_MIN_CHAINWALK &&
-	    likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+	    likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 0;
 	} else
@@ -1348,7 +1360,7 @@ static inline int
 rt_mutex_fasttrylock(struct rt_mutex *lock,
 		     int (*slowfn)(struct rt_mutex *lock))
 {
-	if (likely(rt_mutex_cmpxchg(lock, NULL, current))) {
+	if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) {
 		rt_mutex_deadlock_account_lock(lock, current);
 		return 1;
 	}
@@ -1362,7 +1374,7 @@ rt_mutex_fastunlock(struct rt_mutex *lock,
 {
 	WAKE_Q(wake_q);
 
-	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
 		rt_mutex_deadlock_account_unlock(current);
 
 	} else {
@@ -1484,7 +1496,7 @@ EXPORT_SYMBOL_GPL(rt_mutex_unlock);
 bool __sched rt_mutex_futex_unlock(struct rt_mutex *lock,
 				   struct wake_q_head *wqh)
 {
-	if (likely(rt_mutex_cmpxchg(lock, current, NULL))) {
+	if (likely(rt_mutex_cmpxchg_release(lock, current, NULL))) {
 		rt_mutex_deadlock_account_unlock(current);
 		return false;
 	}

+ 3 - 2
kernel/locking/rwsem-xadd.c

@@ -262,7 +262,7 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
 	 * to reduce unnecessary expensive cmpxchg() operations.
 	 */
 	if (count == RWSEM_WAITING_BIAS &&
-	    cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
+	    cmpxchg_acquire(&sem->count, RWSEM_WAITING_BIAS,
 		    RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
 		if (!list_is_singular(&sem->wait_list))
 			rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
@@ -285,7 +285,8 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
 		if (!(count == 0 || count == RWSEM_WAITING_BIAS))
 			return false;
 
-		old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
+		old = cmpxchg_acquire(&sem->count, count,
+				      count + RWSEM_ACTIVE_WRITE_BIAS);
 		if (old == count) {
 			rwsem_set_owner(sem);
 			return true;