10 年之前 · 71966f3a0b
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1662,7 +1662,7 @@ CPU from reordering them.
 
				 
			
 
				 There are some more advanced barrier functions:
			
 
				 
			
 
				- (*) set_mb(var, value)
			
 
				+ (*) smp_store_mb(var, value)
			
 
				 
			
 
				      This assigns the value to the variable and then inserts a full memory
			
 
				      barrier after it, depending on the function.  It isn't guaranteed to
			
@@ -1975,7 +1975,7 @@ after it has altered the task state:
 
				 	CPU 1
			
 
				 	===============================
			
 
				 	set_current_state();
			
 
				-	  set_mb();
			
 
				+	  smp_store_mb();
			
 
				 	    STORE current->state
			
 
				 	    <general barrier>
			
 
				 	LOAD event_indicated
			
@@ -2016,7 +2016,7 @@ between the STORE to indicate the event and the STORE to set TASK_RUNNING:
 
				 	CPU 1				CPU 2
			
 
				 	===============================	===============================
			
 
				 	set_current_state();		STORE event_indicated
			
 
				-	  set_mb();			wake_up();
			
 
				+	  smp_store_mb();		wake_up();
			
 
				 	    STORE current->state	  <write barrier>
			
 
				 	    <general barrier>		  STORE current->state
			
 
				 	LOAD event_indicated
			
--- a/arch/alpha/include/asm/cmpxchg.h
+++ b/arch/alpha/include/asm/cmpxchg.h
@@ -66,6 +66,4 @@
 
				 #undef __ASM__MB
			
 
				 #undef ____cmpxchg
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 #endif /* _ALPHA_CMPXCHG_H */
			
--- a/arch/arm/include/asm/barrier.h
+++ b/arch/arm/include/asm/barrier.h
@@ -81,7 +81,7 @@ do {									\
 
				 #define read_barrier_depends()		do { } while(0)
			
 
				 #define smp_read_barrier_depends()	do { } while(0)
			
 
				 
			
 
				-#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 #define smp_mb__before_atomic()	smp_mb()
			
 
				 #define smp_mb__after_atomic()	smp_mb()
			
--- a/arch/arm64/include/asm/barrier.h
+++ b/arch/arm64/include/asm/barrier.h
@@ -114,7 +114,7 @@ do {									\
 
				 #define read_barrier_depends()		do { } while(0)
			
 
				 #define smp_read_barrier_depends()	do { } while(0)
			
 
				 
			
 
				-#define set_mb(var, value)	do { var = value; smp_mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 #define nop()		asm volatile("nop");
			
 
				 
			
 
				 #define smp_mb__before_atomic()	smp_mb()
			
--- a/arch/avr32/include/asm/cmpxchg.h
+++ b/arch/avr32/include/asm/cmpxchg.h
@@ -70,8 +70,6 @@ extern unsigned long __cmpxchg_u64_unsupported_on_32bit_kernels(
 
				    if something tries to do an invalid cmpxchg().  */
			
 
				 extern void __cmpxchg_called_with_bad_pointer(void);
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
			
 
				 				      unsigned long new, int size)
			
 
				 {
			
--- a/arch/hexagon/include/asm/cmpxchg.h
+++ b/arch/hexagon/include/asm/cmpxchg.h
@@ -64,7 +64,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 
				  *  looks just like atomic_cmpxchg on our arch currently with a bunch of
			
 
				  *  variable casting.
			
 
				  */
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				 
			
 
				 #define cmpxchg(ptr, old, new)					\
			
 
				 ({								\
			
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,12 +77,7 @@ do {									\
 
				 	___p1;								\
			
 
				 })
			
 
				 
			
 
				-/*
			
 
				- * XXX check on this ---I suspect what Linus really wants here is
			
 
				- * acquire vs release semantics but we can't discuss this stuff with
			
 
				- * Linus just yet.  Grrr...
			
 
				- */
			
 
				-#define set_mb(var, value)	do { (var) = (value); mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				 
			
 
				 /*
			
 
				  * The group barrier in front of the rsm & ssm are necessary to ensure
			
--- a/arch/ia64/include/uapi/asm/cmpxchg.h
+++ b/arch/ia64/include/uapi/asm/cmpxchg.h
@@ -61,8 +61,6 @@ extern void ia64_xchg_called_with_bad_pointer(void);
 
				  * indicated by comparing RETURN with OLD.
			
 
				  */
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 /*
			
 
				  * This function doesn't exist, so you'll get a linker error
			
 
				  * if something tries to do an invalid cmpxchg().
			
--- a/arch/m32r/include/asm/cmpxchg.h
+++ b/arch/m32r/include/asm/cmpxchg.h
@@ -107,8 +107,6 @@ __xchg_local(unsigned long x, volatile void *ptr, int size)
 
				 	((__typeof__(*(ptr)))__xchg_local((unsigned long)(x), (ptr),	\
			
 
				 			sizeof(*(ptr))))
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				-
			
 
				 static inline unsigned long
			
 
				 __cmpxchg_u32(volatile unsigned int *p, unsigned int old, unsigned int new)
			
 
				 {
			
--- a/arch/m68k/include/asm/cmpxchg.h
+++ b/arch/m68k/include/asm/cmpxchg.h
@@ -90,7 +90,6 @@ extern unsigned long __invalid_cmpxchg_size(volatile void *,
 
				  * indicated by comparing RETURN with OLD.
			
 
				  */
			
 
				 #ifdef CONFIG_RMW_INSNS
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				 
			
 
				 static inline unsigned long __cmpxchg(volatile void *p, unsigned long old,
			
 
				 				      unsigned long new, int size)
			
--- a/arch/metag/include/asm/barrier.h
+++ b/arch/metag/include/asm/barrier.h
@@ -84,7 +84,7 @@ static inline void fence(void)
 
				 #define read_barrier_depends()		do { } while (0)
			
 
				 #define smp_read_barrier_depends()	do { } while (0)
			
 
				 
			
 
				-#define set_mb(var, value) do { var = value; smp_mb(); } while (0)
			
 
				+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 #define smp_store_release(p, v)						\
			
 
				 do {									\
			
--- a/arch/metag/include/asm/cmpxchg.h
+++ b/arch/metag/include/asm/cmpxchg.h
@@ -51,8 +51,6 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
 
				 	return old;
			
 
				 }
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 #define cmpxchg(ptr, o, n)						\
			
 
				 	({								\
			
 
				 		__typeof__(*(ptr)) _o_ = (o);				\
			
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -112,8 +112,8 @@
 
				 #define __WEAK_LLSC_MB		"		\n"
			
 
				 #endif
			
 
				 
			
 
				-#define set_mb(var, value) \
			
 
				-	do { var = value; smp_mb(); } while (0)
			
 
				+#define smp_store_mb(var, value) \
			
 
				+	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 #define smp_llsc_mb()	__asm__ __volatile__(__WEAK_LLSC_MB : : :"memory")
			
 
				 
			
--- a/arch/mips/include/asm/cmpxchg.h
+++ b/arch/mips/include/asm/cmpxchg.h
@@ -138,8 +138,6 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz
 
				 		__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))));	\
			
 
				 })
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 #define __cmpxchg_asm(ld, st, m, old, new)				\
			
 
				 ({									\
			
 
				 	__typeof(*(m)) __ret;						\
			
--- a/arch/parisc/include/asm/cmpxchg.h
+++ b/arch/parisc/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ __xchg(unsigned long x, __volatile__ void *ptr, int size)
 
				 #define xchg(ptr, x) \
			
 
				 	((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				-
			
 
				 /* bug catcher for when unsupported size is used - won't link */
			
 
				 extern void __cmpxchg_called_with_bad_pointer(void);
			
 
				 
			
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
 
				 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
			
 
				 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
			
 
				 
			
 
				-#define set_mb(var, value)	do { var = value; mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				 
			
 
				 #ifdef __SUBARCH_HAS_LWSYNC
			
 
				 #    define SMPWMB      LWSYNC
			
--- a/arch/powerpc/include/asm/cmpxchg.h
+++ b/arch/powerpc/include/asm/cmpxchg.h
@@ -144,7 +144,6 @@ __xchg_local(volatile void *ptr, unsigned long x, unsigned int size)
 
				  * Compare and exchange - if *p == old, set it to new,
			
 
				  * and return the old value of *p.
			
 
				  */
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				 
			
 
				 static __always_inline unsigned long
			
 
				 __cmpxchg_u32(volatile unsigned int *p, unsigned long old, unsigned long new)
			
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
 
				 #define smp_mb__before_atomic()		smp_mb()
			
 
				 #define smp_mb__after_atomic()		smp_mb()
			
 
				 
			
 
				-#define set_mb(var, value)		do { var = value; mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)		do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				 
			
 
				 #define smp_store_release(p, v)						\
			
 
				 do {									\
			
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -32,8 +32,6 @@
 
				 	__old;								\
			
 
				 })
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG
			
 
				-
			
 
				 #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn)		\
			
 
				 ({									\
			
 
				 	register __typeof__(*(p1)) __old1 asm("2") = (o1);		\
			
--- a/arch/score/include/asm/cmpxchg.h
+++ b/arch/score/include/asm/cmpxchg.h
@@ -42,8 +42,6 @@ static inline unsigned long __cmpxchg(volatile unsigned long *m,
 
				 					(unsigned long)(o),	\
			
 
				 					(unsigned long)(n)))
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				-
			
 
				 #include <asm-generic/cmpxchg-local.h>
			
 
				 
			
 
				 #endif /* _ASM_SCORE_CMPXCHG_H */
			
--- a/arch/sh/include/asm/barrier.h
+++ b/arch/sh/include/asm/barrier.h
@@ -32,7 +32,7 @@
 
				 #define ctrl_barrier()	__asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop")
			
 
				 #endif
			
 
				 
			
 
				-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
			
 
				+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
			
 
				 
			
 
				 #include <asm-generic/barrier.h>
			
 
				 
			
--- a/arch/sh/include/asm/cmpxchg.h
+++ b/arch/sh/include/asm/cmpxchg.h
@@ -46,8 +46,6 @@ extern void __xchg_called_with_bad_pointer(void);
 
				  * if something tries to do an invalid cmpxchg(). */
			
 
				 extern void __cmpxchg_called_with_bad_pointer(void);
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
			
 
				 		unsigned long new, int size)
			
 
				 {
			
--- a/arch/sparc/include/asm/barrier_64.h
+++ b/arch/sparc/include/asm/barrier_64.h
@@ -40,8 +40,8 @@ do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
 
				 #define dma_rmb()	rmb()
			
 
				 #define dma_wmb()	wmb()
			
 
				 
			
 
				-#define set_mb(__var, __value) \
			
 
				-	do { __var = __value; membar_safe("#StoreLoad"); } while(0)
			
 
				+#define smp_store_mb(__var, __value) \
			
 
				+	do { WRITE_ONCE(__var, __value); membar_safe("#StoreLoad"); } while(0)
			
 
				 
			
 
				 #ifdef CONFIG_SMP
			
 
				 #define smp_mb()	mb()
			
--- a/arch/sparc/include/asm/cmpxchg_32.h
+++ b/arch/sparc/include/asm/cmpxchg_32.h
@@ -34,7 +34,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
 
				  *
			
 
				  * Cribbed from <asm-parisc/atomic.h>
			
 
				  */
			
 
				-#define __HAVE_ARCH_CMPXCHG	1
			
 
				 
			
 
				 /* bug catcher for when unsupported size is used - won't link */
			
 
				 void __cmpxchg_called_with_bad_pointer(void);
			
--- a/arch/sparc/include/asm/cmpxchg_64.h
+++ b/arch/sparc/include/asm/cmpxchg_64.h
@@ -65,8 +65,6 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr,
 
				 
			
 
				 #include <asm-generic/cmpxchg-local.h>
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 static inline unsigned long
			
 
				 __cmpxchg_u32(volatile int *m, int old, int new)
			
 
				 {
			
--- a/arch/tile/include/asm/atomic_64.h
+++ b/arch/tile/include/asm/atomic_64.h
@@ -105,9 +105,6 @@ static inline long atomic64_add_unless(atomic64_t *v, long a, long u)
 
				 
			
 
				 #define atomic64_inc_not_zero(v)	atomic64_add_unless((v), 1, 0)
			
 
				 
			
 
				-/* Define this to indicate that cmpxchg is an efficient operation. */
			
 
				-#define __HAVE_ARCH_CMPXCHG
			
 
				-
			
 
				 #endif /* !__ASSEMBLY__ */
			
 
				 
			
 
				 #endif /* _ASM_TILE_ATOMIC_64_H */
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -127,7 +127,8 @@ config X86
 
				 	select MODULES_USE_ELF_RELA if X86_64
			
 
				 	select CLONE_BACKWARDS if X86_32
			
 
				 	select ARCH_USE_BUILTIN_BSWAP
			
 
				-	select ARCH_USE_QUEUE_RWLOCK
			
 
				+	select ARCH_USE_QUEUED_SPINLOCKS
			
 
				+	select ARCH_USE_QUEUED_RWLOCKS
			
 
				 	select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
			
 
				 	select OLD_SIGACTION if X86_32
			
 
				 	select COMPAT_OLD_SIGACTION if IA32_EMULATION
			
@@ -666,7 +667,7 @@ config PARAVIRT_DEBUG
 
				 config PARAVIRT_SPINLOCKS
			
 
				 	bool "Paravirtualization layer for spinlocks"
			
 
				 	depends on PARAVIRT && SMP
			
 
				-	select UNINLINE_SPIN_UNLOCK
			
 
				+	select UNINLINE_SPIN_UNLOCK if !QUEUED_SPINLOCKS
			
 
				 	---help---
			
 
				 	  Paravirtualized spinlocks allow a pvops backend to replace the
			
 
				 	  spinlock implementation with something virtualization-friendly
			
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@@ -35,12 +35,12 @@
 
				 #define smp_mb()	mb()
			
 
				 #define smp_rmb()	dma_rmb()
			
 
				 #define smp_wmb()	barrier()
			
 
				-#define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
			
 
				+#define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
			
 
				 #else /* !SMP */
			
 
				 #define smp_mb()	barrier()
			
 
				 #define smp_rmb()	barrier()
			
 
				 #define smp_wmb()	barrier()
			
 
				-#define set_mb(var, value) do { var = value; barrier(); } while (0)
			
 
				+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
			
 
				 #endif /* SMP */
			
 
				 
			
 
				 #define read_barrier_depends()		do { } while (0)
			
--- a/arch/x86/include/asm/cmpxchg.h
+++ b/arch/x86/include/asm/cmpxchg.h
@@ -4,8 +4,6 @@
 
				 #include <linux/compiler.h>
			
 
				 #include <asm/alternative.h> /* Provides LOCK_PREFIX */
			
 
				 
			
 
				-#define __HAVE_ARCH_CMPXCHG 1
			
 
				-
			
 
				 /*
			
 
				  * Non-existant functions to indicate usage errors at link time
			
 
				  * (or compile-time if the compiler implements __compiletime_error().
			
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -712,6 +712,31 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
 
				 
			
 
				 #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
			
 
				 
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+
			
 
				+static __always_inline void pv_queued_spin_lock_slowpath(struct qspinlock *lock,
			
 
				+							u32 val)
			
 
				+{
			
 
				+	PVOP_VCALL2(pv_lock_ops.queued_spin_lock_slowpath, lock, val);
			
 
				+}
			
 
				+
			
 
				+static __always_inline void pv_queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	PVOP_VCALLEE1(pv_lock_ops.queued_spin_unlock, lock);
			
 
				+}
			
 
				+
			
 
				+static __always_inline void pv_wait(u8 *ptr, u8 val)
			
 
				+{
			
 
				+	PVOP_VCALL2(pv_lock_ops.wait, ptr, val);
			
 
				+}
			
 
				+
			
 
				+static __always_inline void pv_kick(int cpu)
			
 
				+{
			
 
				+	PVOP_VCALL1(pv_lock_ops.kick, cpu);
			
 
				+}
			
 
				+
			
 
				+#else /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				+
			
 
				 static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
			
 
				 							__ticket_t ticket)
			
 
				 {
			
@@ -724,7 +749,9 @@ static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
 
				 	PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
			
 
				 }
			
 
				 
			
 
				-#endif
			
 
				+#endif /* CONFIG_QUEUED_SPINLOCKS */
			
 
				+
			
 
				+#endif /* SMP && PARAVIRT_SPINLOCKS */
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
 
				 #define PV_SAVE_REGS "pushl %ecx; pushl %edx;"
			
--- a/arch/x86/include/asm/paravirt_types.h
+++ b/arch/x86/include/asm/paravirt_types.h
@@ -334,9 +334,19 @@ struct arch_spinlock;
 
				 typedef u16 __ticket_t;
			
 
				 #endif
			
 
				 
			
 
				+struct qspinlock;
			
 
				+
			
 
				 struct pv_lock_ops {
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+	void (*queued_spin_lock_slowpath)(struct qspinlock *lock, u32 val);
			
 
				+	struct paravirt_callee_save queued_spin_unlock;
			
 
				+
			
 
				+	void (*wait)(u8 *ptr, u8 val);
			
 
				+	void (*kick)(int cpu);
			
 
				+#else /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				 	struct paravirt_callee_save lock_spinning;
			
 
				 	void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
			
 
				+#endif /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				 };
			
 
				 
			
 
				 /* This contains all the paravirt structures: we get a convenient
			
--- a/arch/x86/include/asm/qspinlock.h
+++ b/arch/x86/include/asm/qspinlock.h
@@ -0,0 +1,57 @@
 
				+#ifndef _ASM_X86_QSPINLOCK_H
			
 
				+#define _ASM_X86_QSPINLOCK_H
			
 
				+
			
 
				+#include <asm/cpufeature.h>
			
 
				+#include <asm-generic/qspinlock_types.h>
			
 
				+#include <asm/paravirt.h>
			
 
				+
			
 
				+#define	queued_spin_unlock queued_spin_unlock
			
 
				+/**
			
 
				+ * queued_spin_unlock - release a queued spinlock
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * A smp_store_release() on the least-significant byte.
			
 
				+ */
			
 
				+static inline void native_queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	smp_store_release((u8 *)lock, 0);
			
 
				+}
			
 
				+
			
 
				+#ifdef CONFIG_PARAVIRT_SPINLOCKS
			
 
				+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
			
 
				+extern void __pv_init_lock_hash(void);
			
 
				+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
			
 
				+extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock);
			
 
				+
			
 
				+static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
			
 
				+{
			
 
				+	pv_queued_spin_lock_slowpath(lock, val);
			
 
				+}
			
 
				+
			
 
				+static inline void queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	pv_queued_spin_unlock(lock);
			
 
				+}
			
 
				+#else
			
 
				+static inline void queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	native_queued_spin_unlock(lock);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#define virt_queued_spin_lock virt_queued_spin_lock
			
 
				+
			
 
				+static inline bool virt_queued_spin_lock(struct qspinlock *lock)
			
 
				+{
			
 
				+	if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
			
 
				+		return false;
			
 
				+
			
 
				+	while (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) != 0)
			
 
				+		cpu_relax();
			
 
				+
			
 
				+	return true;
			
 
				+}
			
 
				+
			
 
				+#include <asm-generic/qspinlock.h>
			
 
				+
			
 
				+#endif /* _ASM_X86_QSPINLOCK_H */
			
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,6 @@
 
				+#ifndef __ASM_QSPINLOCK_PARAVIRT_H
			
 
				+#define __ASM_QSPINLOCK_PARAVIRT_H
			
 
				+
			
 
				+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
			
 
				+
			
 
				+#endif
			
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -42,6 +42,10 @@
 
				 extern struct static_key paravirt_ticketlocks_enabled;
			
 
				 static __always_inline bool static_key_false(struct static_key *key);
			
 
				 
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+#include <asm/qspinlock.h>
			
 
				+#else
			
 
				+
			
 
				 #ifdef CONFIG_PARAVIRT_SPINLOCKS
			
 
				 
			
 
				 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
			
@@ -196,6 +200,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 
				 		cpu_relax();
			
 
				 	}
			
 
				 }
			
 
				+#endif /* CONFIG_QUEUED_SPINLOCKS */
			
 
				 
			
 
				 /*
			
 
				  * Read-write spinlocks, allowing multiple readers
			
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -23,6 +23,9 @@ typedef u32 __ticketpair_t;
 
				 
			
 
				 #define TICKET_SHIFT	(sizeof(__ticket_t) * 8)
			
 
				 
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+#include <asm-generic/qspinlock_types.h>
			
 
				+#else
			
 
				 typedef struct arch_spinlock {
			
 
				 	union {
			
 
				 		__ticketpair_t head_tail;
			
@@ -33,6 +36,7 @@ typedef struct arch_spinlock {
 
				 } arch_spinlock_t;
			
 
				 
			
 
				 #define __ARCH_SPIN_LOCK_UNLOCKED	{ { 0 } }
			
 
				+#endif /* CONFIG_QUEUED_SPINLOCKS */
			
 
				 
			
 
				 #include <asm-generic/qrwlock_types.h>
			
 
				 
			
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -584,6 +584,39 @@ static void kvm_kick_cpu(int cpu)
 
				 	kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
			
 
				 }
			
 
				 
			
 
				+
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+
			
 
				+#include <asm/qspinlock.h>
			
 
				+
			
 
				+static void kvm_wait(u8 *ptr, u8 val)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (in_nmi())
			
 
				+		return;
			
 
				+
			
 
				+	local_irq_save(flags);
			
 
				+
			
 
				+	if (READ_ONCE(*ptr) != val)
			
 
				+		goto out;
			
 
				+
			
 
				+	/*
			
 
				+	 * halt until it's our turn and kicked. Note that we do safe halt
			
 
				+	 * for irq enabled case to avoid hang when lock info is overwritten
			
 
				+	 * in irq spinlock slowpath and no spurious interrupt occur to save us.
			
 
				+	 */
			
 
				+	if (arch_irqs_disabled_flags(flags))
			
 
				+		halt();
			
 
				+	else
			
 
				+		safe_halt();
			
 
				+
			
 
				+out:
			
 
				+	local_irq_restore(flags);
			
 
				+}
			
 
				+
			
 
				+#else /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				+
			
 
				 enum kvm_contention_stat {
			
 
				 	TAKEN_SLOW,
			
 
				 	TAKEN_SLOW_PICKUP,
			
@@ -817,6 +850,8 @@ static void kvm_unlock_kick(struct arch_spinlock *lock, __ticket_t ticket)
 
				 	}
			
 
				 }
			
 
				 
			
 
				+#endif /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				+
			
 
				 /*
			
 
				  * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present.
			
 
				  */
			
@@ -828,8 +863,16 @@ void __init kvm_spinlock_init(void)
 
				 	if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT))
			
 
				 		return;
			
 
				 
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+	__pv_init_lock_hash();
			
 
				+	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
			
 
				+	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
			
 
				+	pv_lock_ops.wait = kvm_wait;
			
 
				+	pv_lock_ops.kick = kvm_kick_cpu;
			
 
				+#else /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				 	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(kvm_lock_spinning);
			
 
				 	pv_lock_ops.unlock_kick = kvm_unlock_kick;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static __init int kvm_spinlock_init_jump(void)
			
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -8,11 +8,33 @@
 
				 
			
 
				 #include <asm/paravirt.h>
			
 
				 
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+__visible void __native_queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	native_queued_spin_unlock(lock);
			
 
				+}
			
 
				+
			
 
				+PV_CALLEE_SAVE_REGS_THUNK(__native_queued_spin_unlock);
			
 
				+
			
 
				+bool pv_is_native_spin_unlock(void)
			
 
				+{
			
 
				+	return pv_lock_ops.queued_spin_unlock.func ==
			
 
				+		__raw_callee_save___native_queued_spin_unlock;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 struct pv_lock_ops pv_lock_ops = {
			
 
				 #ifdef CONFIG_SMP
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+	.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
			
 
				+	.queued_spin_unlock = PV_CALLEE_SAVE(__native_queued_spin_unlock),
			
 
				+	.wait = paravirt_nop,
			
 
				+	.kick = paravirt_nop,
			
 
				+#else /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				 	.lock_spinning = __PV_IS_CALLEE_SAVE(paravirt_nop),
			
 
				 	.unlock_kick = paravirt_nop,
			
 
				-#endif
			
 
				+#endif /* !CONFIG_QUEUED_SPINLOCKS */
			
 
				+#endif /* SMP */
			
 
				 };
			
 
				 EXPORT_SYMBOL(pv_lock_ops);
			
 
				 
			
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,10 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
 
				 DEF_NATIVE(pv_cpu_ops, clts, "clts");
			
 
				 DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
			
 
				 
			
 
				+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
			
 
				+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%eax)");
			
 
				+#endif
			
 
				+
			
 
				 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
			
 
				 {
			
 
				 	/* arg in %eax, return in %eax */
			
@@ -24,6 +28,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+extern bool pv_is_native_spin_unlock(void);
			
 
				+
			
 
				 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
			
 
				 		      unsigned long addr, unsigned len)
			
 
				 {
			
@@ -47,14 +53,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
				 		PATCH_SITE(pv_mmu_ops, write_cr3);
			
 
				 		PATCH_SITE(pv_cpu_ops, clts);
			
 
				 		PATCH_SITE(pv_cpu_ops, read_tsc);
			
 
				-
			
 
				-	patch_site:
			
 
				-		ret = paravirt_patch_insns(ibuf, len, start, end);
			
 
				-		break;
			
 
				+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
			
 
				+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
			
 
				+			if (pv_is_native_spin_unlock()) {
			
 
				+				start = start_pv_lock_ops_queued_spin_unlock;
			
 
				+				end   = end_pv_lock_ops_queued_spin_unlock;
			
 
				+				goto patch_site;
			
 
				+			}
			
 
				+#endif
			
 
				 
			
 
				 	default:
			
 
				 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
			
 
				 		break;
			
 
				+
			
 
				+patch_site:
			
 
				+		ret = paravirt_patch_insns(ibuf, len, start, end);
			
 
				+		break;
			
 
				 	}
			
 
				 #undef PATCH_SITE
			
 
				 	return ret;
			
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -21,6 +21,10 @@ DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
 
				 DEF_NATIVE(, mov32, "mov %edi, %eax");
			
 
				 DEF_NATIVE(, mov64, "mov %rdi, %rax");
			
 
				 
			
 
				+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
			
 
				+DEF_NATIVE(pv_lock_ops, queued_spin_unlock, "movb $0, (%rdi)");
			
 
				+#endif
			
 
				+
			
 
				 unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
			
 
				 {
			
 
				 	return paravirt_patch_insns(insnbuf, len,
			
@@ -33,6 +37,8 @@ unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
 
				 				    start__mov64, end__mov64);
			
 
				 }
			
 
				 
			
 
				+extern bool pv_is_native_spin_unlock(void);
			
 
				+
			
 
				 unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
			
 
				 		      unsigned long addr, unsigned len)
			
 
				 {
			
@@ -58,14 +64,22 @@ unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
 
				 		PATCH_SITE(pv_cpu_ops, clts);
			
 
				 		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
			
 
				 		PATCH_SITE(pv_cpu_ops, wbinvd);
			
 
				-
			
 
				-	patch_site:
			
 
				-		ret = paravirt_patch_insns(ibuf, len, start, end);
			
 
				-		break;
			
 
				+#if defined(CONFIG_PARAVIRT_SPINLOCKS) && defined(CONFIG_QUEUED_SPINLOCKS)
			
 
				+		case PARAVIRT_PATCH(pv_lock_ops.queued_spin_unlock):
			
 
				+			if (pv_is_native_spin_unlock()) {
			
 
				+				start = start_pv_lock_ops_queued_spin_unlock;
			
 
				+				end   = end_pv_lock_ops_queued_spin_unlock;
			
 
				+				goto patch_site;
			
 
				+			}
			
 
				+#endif
			
 
				 
			
 
				 	default:
			
 
				 		ret = paravirt_patch_default(type, clobbers, ibuf, addr, len);
			
 
				 		break;
			
 
				+
			
 
				+patch_site:
			
 
				+		ret = paravirt_patch_insns(ibuf, len, start, end);
			
 
				+		break;
			
 
				 	}
			
 
				 #undef PATCH_SITE
			
 
				 	return ret;
			
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@@ -39,7 +39,8 @@
 
				 #define smp_mb()	barrier()
			
 
				 #define smp_rmb()	barrier()
			
 
				 #define smp_wmb()	barrier()
			
 
				-#define set_mb(var, value) do { var = value; barrier(); } while (0)
			
 
				+
			
 
				+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); barrier(); } while (0)
			
 
				 
			
 
				 #define read_barrier_depends()		do { } while (0)
			
 
				 #define smp_read_barrier_depends()	do { } while (0)
			
--- a/arch/x86/xen/spinlock.c
+++ b/arch/x86/xen/spinlock.c
@@ -17,6 +17,56 @@
 
				 #include "xen-ops.h"
			
 
				 #include "debugfs.h"
			
 
				 
			
 
				+static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
			
 
				+static DEFINE_PER_CPU(char *, irq_name);
			
 
				+static bool xen_pvspin = true;
			
 
				+
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+
			
 
				+#include <asm/qspinlock.h>
			
 
				+
			
 
				+static void xen_qlock_kick(int cpu)
			
 
				+{
			
 
				+	xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Halt the current CPU & release it back to the host
			
 
				+ */
			
 
				+static void xen_qlock_wait(u8 *byte, u8 val)
			
 
				+{
			
 
				+	int irq = __this_cpu_read(lock_kicker_irq);
			
 
				+
			
 
				+	/* If kicker interrupts not initialized yet, just spin */
			
 
				+	if (irq == -1)
			
 
				+		return;
			
 
				+
			
 
				+	/* clear pending */
			
 
				+	xen_clear_irq_pending(irq);
			
 
				+	barrier();
			
 
				+
			
 
				+	/*
			
 
				+	 * We check the byte value after clearing pending IRQ to make sure
			
 
				+	 * that we won't miss a wakeup event because of the clearing.
			
 
				+	 *
			
 
				+	 * The sync_clear_bit() call in xen_clear_irq_pending() is atomic.
			
 
				+	 * So it is effectively a memory barrier for x86.
			
 
				+	 */
			
 
				+	if (READ_ONCE(*byte) != val)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * If an interrupt happens here, it will leave the wakeup irq
			
 
				+	 * pending, which will cause xen_poll_irq() to return
			
 
				+	 * immediately.
			
 
				+	 */
			
 
				+
			
 
				+	/* Block until irq becomes pending (or perhaps a spurious wakeup) */
			
 
				+	xen_poll_irq(irq);
			
 
				+}
			
 
				+
			
 
				+#else /* CONFIG_QUEUED_SPINLOCKS */
			
 
				+
			
 
				 enum xen_contention_stat {
			
 
				 	TAKEN_SLOW,
			
 
				 	TAKEN_SLOW_PICKUP,
			
@@ -100,12 +150,9 @@ struct xen_lock_waiting {
 
				 	__ticket_t want;
			
 
				 };
			
 
				 
			
 
				-static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
			
 
				-static DEFINE_PER_CPU(char *, irq_name);
			
 
				 static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting);
			
 
				 static cpumask_t waiting_cpus;
			
 
				 
			
 
				-static bool xen_pvspin = true;
			
 
				 __visible void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
			
 
				 {
			
 
				 	int irq = __this_cpu_read(lock_kicker_irq);
			
@@ -217,6 +264,7 @@ static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next)
 
				 		}
			
 
				 	}
			
 
				 }
			
 
				+#endif /* CONFIG_QUEUED_SPINLOCKS */
			
 
				 
			
 
				 static irqreturn_t dummy_handler(int irq, void *dev_id)
			
 
				 {
			
@@ -280,8 +328,16 @@ void __init xen_init_spinlocks(void)
 
				 		return;
			
 
				 	}
			
 
				 	printk(KERN_DEBUG "xen: PV spinlocks enabled\n");
			
 
				+#ifdef CONFIG_QUEUED_SPINLOCKS
			
 
				+	__pv_init_lock_hash();
			
 
				+	pv_lock_ops.queued_spin_lock_slowpath = __pv_queued_spin_lock_slowpath;
			
 
				+	pv_lock_ops.queued_spin_unlock = PV_CALLEE_SAVE(__pv_queued_spin_unlock);
			
 
				+	pv_lock_ops.wait = xen_qlock_wait;
			
 
				+	pv_lock_ops.kick = xen_qlock_kick;
			
 
				+#else
			
 
				 	pv_lock_ops.lock_spinning = PV_CALLEE_SAVE(xen_lock_spinning);
			
 
				 	pv_lock_ops.unlock_kick = xen_unlock_kick;
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -310,7 +366,7 @@ static __init int xen_parse_nopvspin(char *arg)
 
				 }
			
 
				 early_param("xen_nopvspin", xen_parse_nopvspin);
			
 
				 
			
 
				-#ifdef CONFIG_XEN_DEBUG_FS
			
 
				+#if defined(CONFIG_XEN_DEBUG_FS) && !defined(CONFIG_QUEUED_SPINLOCKS)
			
 
				 
			
 
				 static struct dentry *d_spin_debug;
			
 
				 
			
--- a/fs/select.c
+++ b/fs/select.c
@@ -189,7 +189,7 @@ static int __pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
 
				 	 * doesn't imply write barrier and the users expect write
			
 
				 	 * barrier semantics on wakeup functions.  The following
			
 
				 	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
			
 
				-	 * and is paired with set_mb() in poll_schedule_timeout.
			
 
				+	 * and is paired with smp_store_mb() in poll_schedule_timeout.
			
 
				 	 */
			
 
				 	smp_wmb();
			
 
				 	pwq->triggered = 1;
			
@@ -244,7 +244,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
				 	/*
			
 
				 	 * Prepare for the next iteration.
			
 
				 	 *
			
 
				-	 * The following set_mb() serves two purposes.  First, it's
			
 
				+	 * The following smp_store_mb() serves two purposes.  First, it's
			
 
				 	 * the counterpart rmb of the wmb in pollwake() such that data
			
 
				 	 * written before wake up is always visible after wake up.
			
 
				 	 * Second, the full barrier guarantees that triggered clearing
			
@@ -252,7 +252,7 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
 
				 	 * this problem doesn't exist for the first iteration as
			
 
				 	 * add_wait_queue() has full barrier semantics.
			
 
				 	 */
			
 
				-	set_mb(pwq->triggered, 0);
			
 
				+	smp_store_mb(pwq->triggered, 0);
			
 
				 
			
 
				 	return rc;
			
 
				 }
			
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -66,8 +66,8 @@
 
				 #define smp_read_barrier_depends()	do { } while (0)
			
 
				 #endif
			
 
				 
			
 
				-#ifndef set_mb
			
 
				-#define set_mb(var, value)  do { (var) = (value); mb(); } while (0)
			
 
				+#ifndef smp_store_mb
			
 
				+#define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				 #endif
			
 
				 
			
 
				 #ifndef smp_mb__before_atomic
			
--- a/include/asm-generic/cmpxchg.h
+++ b/include/asm-generic/cmpxchg.h
@@ -86,9 +86,6 @@ unsigned long __xchg(unsigned long x, volatile void *ptr, int size)
 
				 
			
 
				 /*
			
 
				  * Atomic compare and exchange.
			
 
				- *
			
 
				- * Do not define __HAVE_ARCH_CMPXCHG because we want to use it to check whether
			
 
				- * a cmpxchg primitive faster than repeated local irq save/restore exists.
			
 
				  */
			
 
				 #include <asm-generic/cmpxchg-local.h>
			
 
				 
			
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -0,0 +1,139 @@
 
				+/*
			
 
				+ * Queued spinlock
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
			
 
				+ *
			
 
				+ * Authors: Waiman Long <waiman.long@hp.com>
			
 
				+ */
			
 
				+#ifndef __ASM_GENERIC_QSPINLOCK_H
			
 
				+#define __ASM_GENERIC_QSPINLOCK_H
			
 
				+
			
 
				+#include <asm-generic/qspinlock_types.h>
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_is_locked - is the spinlock locked?
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ * Return: 1 if it is locked, 0 otherwise
			
 
				+ */
			
 
				+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
			
 
				+{
			
 
				+	return atomic_read(&lock->val);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_value_unlocked - is the spinlock structure unlocked?
			
 
				+ * @lock: queued spinlock structure
			
 
				+ * Return: 1 if it is unlocked, 0 otherwise
			
 
				+ *
			
 
				+ * N.B. Whenever there are tasks waiting for the lock, it is considered
			
 
				+ *      locked wrt the lockref code to avoid lock stealing by the lockref
			
 
				+ *      code and change things underneath the lock. This also allows some
			
 
				+ *      optimizations to be applied without conflict with lockref.
			
 
				+ */
			
 
				+static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
			
 
				+{
			
 
				+	return !atomic_read(&lock.val);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_is_contended - check if the lock is contended
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ * Return: 1 if lock contended, 0 otherwise
			
 
				+ */
			
 
				+static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
			
 
				+{
			
 
				+	return atomic_read(&lock->val) & ~_Q_LOCKED_MASK;
			
 
				+}
			
 
				+/**
			
 
				+ * queued_spin_trylock - try to acquire the queued spinlock
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ * Return: 1 if lock acquired, 0 if failed
			
 
				+ */
			
 
				+static __always_inline int queued_spin_trylock(struct qspinlock *lock)
			
 
				+{
			
 
				+	if (!atomic_read(&lock->val) &&
			
 
				+	   (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
			
 
				+		return 1;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_lock - acquire a queued spinlock
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ */
			
 
				+static __always_inline void queued_spin_lock(struct qspinlock *lock)
			
 
				+{
			
 
				+	u32 val;
			
 
				+
			
 
				+	val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
			
 
				+	if (likely(val == 0))
			
 
				+		return;
			
 
				+	queued_spin_lock_slowpath(lock, val);
			
 
				+}
			
 
				+
			
 
				+#ifndef queued_spin_unlock
			
 
				+/**
			
 
				+ * queued_spin_unlock - release a queued spinlock
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ */
			
 
				+static __always_inline void queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	/*
			
 
				+	 * smp_mb__before_atomic() in order to guarantee release semantics
			
 
				+	 */
			
 
				+	smp_mb__before_atomic_dec();
			
 
				+	atomic_sub(_Q_LOCKED_VAL, &lock->val);
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_unlock_wait - wait until current lock holder releases the lock
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * There is a very slight possibility of live-lock if the lockers keep coming
			
 
				+ * and the waiter is just unfortunate enough to not see any unlock state.
			
 
				+ */
			
 
				+static inline void queued_spin_unlock_wait(struct qspinlock *lock)
			
 
				+{
			
 
				+	while (atomic_read(&lock->val) & _Q_LOCKED_MASK)
			
 
				+		cpu_relax();
			
 
				+}
			
 
				+
			
 
				+#ifndef virt_queued_spin_lock
			
 
				+static __always_inline bool virt_queued_spin_lock(struct qspinlock *lock)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Initializier
			
 
				+ */
			
 
				+#define	__ARCH_SPIN_LOCK_UNLOCKED	{ ATOMIC_INIT(0) }
			
 
				+
			
 
				+/*
			
 
				+ * Remapping spinlock architecture specific functions to the corresponding
			
 
				+ * queued spinlock functions.
			
 
				+ */
			
 
				+#define arch_spin_is_locked(l)		queued_spin_is_locked(l)
			
 
				+#define arch_spin_is_contended(l)	queued_spin_is_contended(l)
			
 
				+#define arch_spin_value_unlocked(l)	queued_spin_value_unlocked(l)
			
 
				+#define arch_spin_lock(l)		queued_spin_lock(l)
			
 
				+#define arch_spin_trylock(l)		queued_spin_trylock(l)
			
 
				+#define arch_spin_unlock(l)		queued_spin_unlock(l)
			
 
				+#define arch_spin_lock_flags(l, f)	queued_spin_lock(l)
			
 
				+#define arch_spin_unlock_wait(l)	queued_spin_unlock_wait(l)
			
 
				+
			
 
				+#endif /* __ASM_GENERIC_QSPINLOCK_H */
			
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -0,0 +1,79 @@
 
				+/*
			
 
				+ * Queued spinlock
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
			
 
				+ *
			
 
				+ * Authors: Waiman Long <waiman.long@hp.com>
			
 
				+ */
			
 
				+#ifndef __ASM_GENERIC_QSPINLOCK_TYPES_H
			
 
				+#define __ASM_GENERIC_QSPINLOCK_TYPES_H
			
 
				+
			
 
				+/*
			
 
				+ * Including atomic.h with PARAVIRT on will cause compilation errors because
			
 
				+ * of recursive header file incluson via paravirt_types.h. So don't include
			
 
				+ * it if PARAVIRT is on.
			
 
				+ */
			
 
				+#ifndef CONFIG_PARAVIRT
			
 
				+#include <linux/types.h>
			
 
				+#include <linux/atomic.h>
			
 
				+#endif
			
 
				+
			
 
				+typedef struct qspinlock {
			
 
				+	atomic_t	val;
			
 
				+} arch_spinlock_t;
			
 
				+
			
 
				+/*
			
 
				+ * Bitfields in the atomic value:
			
 
				+ *
			
 
				+ * When NR_CPUS < 16K
			
 
				+ *  0- 7: locked byte
			
 
				+ *     8: pending
			
 
				+ *  9-15: not used
			
 
				+ * 16-17: tail index
			
 
				+ * 18-31: tail cpu (+1)
			
 
				+ *
			
 
				+ * When NR_CPUS >= 16K
			
 
				+ *  0- 7: locked byte
			
 
				+ *     8: pending
			
 
				+ *  9-10: tail index
			
 
				+ * 11-31: tail cpu (+1)
			
 
				+ */
			
 
				+#define	_Q_SET_MASK(type)	(((1U << _Q_ ## type ## _BITS) - 1)\
			
 
				+				      << _Q_ ## type ## _OFFSET)
			
 
				+#define _Q_LOCKED_OFFSET	0
			
 
				+#define _Q_LOCKED_BITS		8
			
 
				+#define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
			
 
				+
			
 
				+#define _Q_PENDING_OFFSET	(_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
			
 
				+#if CONFIG_NR_CPUS < (1U << 14)
			
 
				+#define _Q_PENDING_BITS		8
			
 
				+#else
			
 
				+#define _Q_PENDING_BITS		1
			
 
				+#endif
			
 
				+#define _Q_PENDING_MASK		_Q_SET_MASK(PENDING)
			
 
				+
			
 
				+#define _Q_TAIL_IDX_OFFSET	(_Q_PENDING_OFFSET + _Q_PENDING_BITS)
			
 
				+#define _Q_TAIL_IDX_BITS	2
			
 
				+#define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
			
 
				+
			
 
				+#define _Q_TAIL_CPU_OFFSET	(_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
			
 
				+#define _Q_TAIL_CPU_BITS	(32 - _Q_TAIL_CPU_OFFSET)
			
 
				+#define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
			
 
				+
			
 
				+#define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
			
 
				+#define _Q_TAIL_MASK		(_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
			
 
				+
			
 
				+#define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
			
 
				+#define _Q_PENDING_VAL		(1U << _Q_PENDING_OFFSET)
			
 
				+
			
 
				+#endif /* __ASM_GENERIC_QSPINLOCK_TYPES_H */
			
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -250,7 +250,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
				 	({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
			
 
				 
			
 
				 #define WRITE_ONCE(x, val) \
			
 
				-	({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
			
 
				+	({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
			
 
				 
			
 
				 #endif /* __KERNEL__ */
			
 
				 
			
@@ -450,7 +450,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
				  * with an explicit memory barrier or atomic instruction that provides the
			
 
				  * required ordering.
			
 
				  *
			
 
				- * If possible use READ_ONCE/ASSIGN_ONCE instead.
			
 
				+ * If possible use READ_ONCE()/WRITE_ONCE() instead.
			
 
				  */
			
 
				 #define __ACCESS_ONCE(x) ({ \
			
 
				 	 __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \
			
--- a/include/linux/osq_lock.h
+++ b/include/linux/osq_lock.h
@@ -32,4 +32,9 @@ static inline void osq_lock_init(struct optimistic_spin_queue *lock)
 
				 extern bool osq_lock(struct optimistic_spin_queue *lock);
			
 
				 extern void osq_unlock(struct optimistic_spin_queue *lock);
			
 
				 
			
 
				+static inline bool osq_is_locked(struct optimistic_spin_queue *lock)
			
 
				+{
			
 
				+	return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL;
			
 
				+}
			
 
				+
			
 
				 #endif
			
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -252,7 +252,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
				 #define set_task_state(tsk, state_value)			\
			
 
				 	do {							\
			
 
				 		(tsk)->task_state_change = _THIS_IP_;		\
			
 
				-		set_mb((tsk)->state, (state_value));		\
			
 
				+		smp_store_mb((tsk)->state, (state_value));		\
			
 
				 	} while (0)
			
 
				 
			
 
				 /*
			
@@ -274,7 +274,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
				 #define set_current_state(state_value)				\
			
 
				 	do {							\
			
 
				 		current->task_state_change = _THIS_IP_;		\
			
 
				-		set_mb(current->state, (state_value));		\
			
 
				+		smp_store_mb(current->state, (state_value));		\
			
 
				 	} while (0)
			
 
				 
			
 
				 #else
			
@@ -282,7 +282,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
				 #define __set_task_state(tsk, state_value)		\
			
 
				 	do { (tsk)->state = (state_value); } while (0)
			
 
				 #define set_task_state(tsk, state_value)		\
			
 
				-	set_mb((tsk)->state, (state_value))
			
 
				+	smp_store_mb((tsk)->state, (state_value))
			
 
				 
			
 
				 /*
			
 
				  * set_current_state() includes a barrier so that the write of current->state
			
@@ -298,7 +298,7 @@ extern char ___assert_task_state[1 - 2*!!(
 
				 #define __set_current_state(state_value)		\
			
 
				 	do { current->state = (state_value); } while (0)
			
 
				 #define set_current_state(state_value)			\
			
 
				-	set_mb(current->state, (state_value))
			
 
				+	smp_store_mb(current->state, (state_value))
			
 
				 
			
 
				 #endif
			
 
				 
			
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -235,9 +235,16 @@ config LOCK_SPIN_ON_OWNER
 
				        def_bool y
			
 
				        depends on MUTEX_SPIN_ON_OWNER || RWSEM_SPIN_ON_OWNER
			
 
				 
			
 
				-config ARCH_USE_QUEUE_RWLOCK
			
 
				+config ARCH_USE_QUEUED_SPINLOCKS
			
 
				 	bool
			
 
				 
			
 
				-config QUEUE_RWLOCK
			
 
				-	def_bool y if ARCH_USE_QUEUE_RWLOCK
			
 
				+config QUEUED_SPINLOCKS
			
 
				+	def_bool y if ARCH_USE_QUEUED_SPINLOCKS
			
 
				+	depends on SMP
			
 
				+
			
 
				+config ARCH_USE_QUEUED_RWLOCKS
			
 
				+	bool
			
 
				+
			
 
				+config QUEUED_RWLOCKS
			
 
				+	def_bool y if ARCH_USE_QUEUED_RWLOCKS
			
 
				 	depends on SMP
			
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -2055,7 +2055,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
 
				 {
			
 
				 	/*
			
 
				 	 * The task state is guaranteed to be set before another task can
			
 
				-	 * wake it. set_current_state() is implemented using set_mb() and
			
 
				+	 * wake it. set_current_state() is implemented using smp_store_mb() and
			
 
				 	 * queue_me() calls spin_unlock() upon completion, both serializing
			
 
				 	 * access to the hash list and forcing another memory barrier.
			
 
				 	 */
			
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_SMP) += spinlock.o
 
				 obj-$(CONFIG_LOCK_SPIN_ON_OWNER) += osq_lock.o
			
 
				 obj-$(CONFIG_SMP) += lglock.o
			
 
				 obj-$(CONFIG_PROVE_LOCKING) += spinlock.o
			
 
				+obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o
			
 
				 obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
			
 
				 obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
			
 
				 obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o
			
@@ -25,5 +26,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
 
				 obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
			
 
				 obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
			
 
				 obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
			
 
				-obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
			
 
				+obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
			
 
				 obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
			
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4066,8 +4066,7 @@ void __init lockdep_info(void)
 
				 
			
 
				 #ifdef CONFIG_DEBUG_LOCKDEP
			
 
				 	if (lockdep_init_error) {
			
 
				-		printk("WARNING: lockdep init error! lock-%s was acquired"
			
 
				-			"before lockdep_init\n", lock_init_error);
			
 
				+		printk("WARNING: lockdep init error: lock '%s' was acquired before lockdep_init().\n", lock_init_error);
			
 
				 		printk("Call stack leading to lockdep invocation was:\n");
			
 
				 		print_stack_trace(&lockdep_init_trace, 0);
			
 
				 	}
			
--- a/kernel/locking/mcs_spinlock.h
+++ b/kernel/locking/mcs_spinlock.h
@@ -17,6 +17,7 @@
 
				 struct mcs_spinlock {
			
 
				 	struct mcs_spinlock *next;
			
 
				 	int locked; /* 1 if lock acquired */
			
 
				+	int count;  /* nesting count, see qspinlock.c */
			
 
				 };
			
 
				 
			
 
				 #ifndef arch_mcs_spin_lock_contended
			
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -1,5 +1,5 @@
 
				 /*
			
 
				- * Queue read/write lock
			
 
				+ * Queued read/write locks
			
 
				  *
			
 
				  * This program is free software; you can redistribute it and/or modify
			
 
				  * it under the terms of the GNU General Public License as published by
			
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -0,0 +1,473 @@
 
				+/*
			
 
				+ * Queued spinlock
			
 
				+ *
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
			
 
				+ * (C) Copyright 2013-2014 Red Hat, Inc.
			
 
				+ * (C) Copyright 2015 Intel Corp.
			
 
				+ *
			
 
				+ * Authors: Waiman Long <waiman.long@hp.com>
			
 
				+ *          Peter Zijlstra <peterz@infradead.org>
			
 
				+ */
			
 
				+
			
 
				+#ifndef _GEN_PV_LOCK_SLOWPATH
			
 
				+
			
 
				+#include <linux/smp.h>
			
 
				+#include <linux/bug.h>
			
 
				+#include <linux/cpumask.h>
			
 
				+#include <linux/percpu.h>
			
 
				+#include <linux/hardirq.h>
			
 
				+#include <linux/mutex.h>
			
 
				+#include <asm/byteorder.h>
			
 
				+#include <asm/qspinlock.h>
			
 
				+
			
 
				+/*
			
 
				+ * The basic principle of a queue-based spinlock can best be understood
			
 
				+ * by studying a classic queue-based spinlock implementation called the
			
 
				+ * MCS lock. The paper below provides a good description for this kind
			
 
				+ * of lock.
			
 
				+ *
			
 
				+ * http://www.cise.ufl.edu/tr/DOC/REP-1992-71.pdf
			
 
				+ *
			
 
				+ * This queued spinlock implementation is based on the MCS lock, however to make
			
 
				+ * it fit the 4 bytes we assume spinlock_t to be, and preserve its existing
			
 
				+ * API, we must modify it somehow.
			
 
				+ *
			
 
				+ * In particular; where the traditional MCS lock consists of a tail pointer
			
 
				+ * (8 bytes) and needs the next pointer (another 8 bytes) of its own node to
			
 
				+ * unlock the next pending (next->locked), we compress both these: {tail,
			
 
				+ * next->locked} into a single u32 value.
			
 
				+ *
			
 
				+ * Since a spinlock disables recursion of its own context and there is a limit
			
 
				+ * to the contexts that can nest; namely: task, softirq, hardirq, nmi. As there
			
 
				+ * are at most 4 nesting levels, it can be encoded by a 2-bit number. Now
			
 
				+ * we can encode the tail by combining the 2-bit nesting level with the cpu
			
 
				+ * number. With one byte for the lock value and 3 bytes for the tail, only a
			
 
				+ * 32-bit word is now needed. Even though we only need 1 bit for the lock,
			
 
				+ * we extend it to a full byte to achieve better performance for architectures
			
 
				+ * that support atomic byte write.
			
 
				+ *
			
 
				+ * We also change the first spinner to spin on the lock bit instead of its
			
 
				+ * node; whereby avoiding the need to carry a node from lock to unlock, and
			
 
				+ * preserving existing lock API. This also makes the unlock code simpler and
			
 
				+ * faster.
			
 
				+ *
			
 
				+ * N.B. The current implementation only supports architectures that allow
			
 
				+ *      atomic operations on smaller 8-bit and 16-bit data types.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include "mcs_spinlock.h"
			
 
				+
			
 
				+#ifdef CONFIG_PARAVIRT_SPINLOCKS
			
 
				+#define MAX_NODES	8
			
 
				+#else
			
 
				+#define MAX_NODES	4
			
 
				+#endif
			
 
				+
			
 
				+/*
			
 
				+ * Per-CPU queue node structures; we can never have more than 4 nested
			
 
				+ * contexts: task, softirq, hardirq, nmi.
			
 
				+ *
			
 
				+ * Exactly fits one 64-byte cacheline on a 64-bit architecture.
			
 
				+ *
			
 
				+ * PV doubles the storage and uses the second cacheline for PV state.
			
 
				+ */
			
 
				+static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
			
 
				+
			
 
				+/*
			
 
				+ * We must be able to distinguish between no-tail and the tail at 0:0,
			
 
				+ * therefore increment the cpu number by one.
			
 
				+ */
			
 
				+
			
 
				+static inline u32 encode_tail(int cpu, int idx)
			
 
				+{
			
 
				+	u32 tail;
			
 
				+
			
 
				+#ifdef CONFIG_DEBUG_SPINLOCK
			
 
				+	BUG_ON(idx > 3);
			
 
				+#endif
			
 
				+	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
			
 
				+	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
			
 
				+
			
 
				+	return tail;
			
 
				+}
			
 
				+
			
 
				+static inline struct mcs_spinlock *decode_tail(u32 tail)
			
 
				+{
			
 
				+	int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
			
 
				+	int idx = (tail &  _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
			
 
				+
			
 
				+	return per_cpu_ptr(&mcs_nodes[idx], cpu);
			
 
				+}
			
 
				+
			
 
				+#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
			
 
				+
			
 
				+/*
			
 
				+ * By using the whole 2nd least significant byte for the pending bit, we
			
 
				+ * can allow better optimization of the lock acquisition for the pending
			
 
				+ * bit holder.
			
 
				+ *
			
 
				+ * This internal structure is also used by the set_locked function which
			
 
				+ * is not restricted to _Q_PENDING_BITS == 8.
			
 
				+ */
			
 
				+struct __qspinlock {
			
 
				+	union {
			
 
				+		atomic_t val;
			
 
				+#ifdef __LITTLE_ENDIAN
			
 
				+		struct {
			
 
				+			u8	locked;
			
 
				+			u8	pending;
			
 
				+		};
			
 
				+		struct {
			
 
				+			u16	locked_pending;
			
 
				+			u16	tail;
			
 
				+		};
			
 
				+#else
			
 
				+		struct {
			
 
				+			u16	tail;
			
 
				+			u16	locked_pending;
			
 
				+		};
			
 
				+		struct {
			
 
				+			u8	reserved[2];
			
 
				+			u8	pending;
			
 
				+			u8	locked;
			
 
				+		};
			
 
				+#endif
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+#if _Q_PENDING_BITS == 8
			
 
				+/**
			
 
				+ * clear_pending_set_locked - take ownership and clear the pending bit.
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * *,1,0 -> *,0,1
			
 
				+ *
			
 
				+ * Lock stealing is not allowed if this function is used.
			
 
				+ */
			
 
				+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ * @tail : The new queue tail code word
			
 
				+ * Return: The previous queue tail code word
			
 
				+ *
			
 
				+ * xchg(lock, tail)
			
 
				+ *
			
 
				+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
			
 
				+ */
			
 
				+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
			
 
				+}
			
 
				+
			
 
				+#else /* _Q_PENDING_BITS == 8 */
			
 
				+
			
 
				+/**
			
 
				+ * clear_pending_set_locked - take ownership and clear the pending bit.
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * *,1,0 -> *,0,1
			
 
				+ */
			
 
				+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
			
 
				+{
			
 
				+	atomic_add(-_Q_PENDING_VAL + _Q_LOCKED_VAL, &lock->val);
			
 
				+}
			
 
				+
			
 
				+/**
			
 
				+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ * @tail : The new queue tail code word
			
 
				+ * Return: The previous queue tail code word
			
 
				+ *
			
 
				+ * xchg(lock, tail)
			
 
				+ *
			
 
				+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
			
 
				+ */
			
 
				+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
			
 
				+{
			
 
				+	u32 old, new, val = atomic_read(&lock->val);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
			
 
				+		old = atomic_cmpxchg(&lock->val, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+	return old;
			
 
				+}
			
 
				+#endif /* _Q_PENDING_BITS == 8 */
			
 
				+
			
 
				+/**
			
 
				+ * set_locked - Set the lock bit and own the lock
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * *,*,0 -> *,0,1
			
 
				+ */
			
 
				+static __always_inline void set_locked(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
			
 
				+}
			
 
				+
			
 
				+
			
 
				+/*
			
 
				+ * Generate the native code for queued_spin_unlock_slowpath(); provide NOPs for
			
 
				+ * all the PV callbacks.
			
 
				+ */
			
 
				+
			
 
				+static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
			
 
				+static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
			
 
				+static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
			
 
				+
			
 
				+static __always_inline void __pv_wait_head(struct qspinlock *lock,
			
 
				+					   struct mcs_spinlock *node) { }
			
 
				+
			
 
				+#define pv_enabled()		false
			
 
				+
			
 
				+#define pv_init_node		__pv_init_node
			
 
				+#define pv_wait_node		__pv_wait_node
			
 
				+#define pv_kick_node		__pv_kick_node
			
 
				+#define pv_wait_head		__pv_wait_head
			
 
				+
			
 
				+#ifdef CONFIG_PARAVIRT_SPINLOCKS
			
 
				+#define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
			
 
				+#endif
			
 
				+
			
 
				+#endif /* _GEN_PV_LOCK_SLOWPATH */
			
 
				+
			
 
				+/**
			
 
				+ * queued_spin_lock_slowpath - acquire the queued spinlock
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ * @val: Current value of the queued spinlock 32-bit word
			
 
				+ *
			
 
				+ * (queue tail, pending bit, lock value)
			
 
				+ *
			
 
				+ *              fast     :    slow                                  :    unlock
			
 
				+ *                       :                                          :
			
 
				+ * uncontended  (0,0,0) -:--> (0,0,1) ------------------------------:--> (*,*,0)
			
 
				+ *                       :       | ^--------.------.             /  :
			
 
				+ *                       :       v           \      \            |  :
			
 
				+ * pending               :    (0,1,1) +--> (0,1,0)   \           |  :
			
 
				+ *                       :       | ^--'              |           |  :
			
 
				+ *                       :       v                   |           |  :
			
 
				+ * uncontended           :    (n,x,y) +--> (n,0,0) --'           |  :
			
 
				+ *   queue               :       | ^--'                          |  :
			
 
				+ *                       :       v                               |  :
			
 
				+ * contended             :    (*,x,y) +--> (*,0,0) ---> (*,0,1) -'  :
			
 
				+ *   queue               :         ^--'                             :
			
 
				+ */
			
 
				+void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
			
 
				+{
			
 
				+	struct mcs_spinlock *prev, *next, *node;
			
 
				+	u32 new, old, tail;
			
 
				+	int idx;
			
 
				+
			
 
				+	BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
			
 
				+
			
 
				+	if (pv_enabled())
			
 
				+		goto queue;
			
 
				+
			
 
				+	if (virt_queued_spin_lock(lock))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * wait for in-progress pending->locked hand-overs
			
 
				+	 *
			
 
				+	 * 0,1,0 -> 0,0,1
			
 
				+	 */
			
 
				+	if (val == _Q_PENDING_VAL) {
			
 
				+		while ((val = atomic_read(&lock->val)) == _Q_PENDING_VAL)
			
 
				+			cpu_relax();
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * trylock || pending
			
 
				+	 *
			
 
				+	 * 0,0,0 -> 0,0,1 ; trylock
			
 
				+	 * 0,0,1 -> 0,1,1 ; pending
			
 
				+	 */
			
 
				+	for (;;) {
			
 
				+		/*
			
 
				+		 * If we observe any contention; queue.
			
 
				+		 */
			
 
				+		if (val & ~_Q_LOCKED_MASK)
			
 
				+			goto queue;
			
 
				+
			
 
				+		new = _Q_LOCKED_VAL;
			
 
				+		if (val == new)
			
 
				+			new |= _Q_PENDING_VAL;
			
 
				+
			
 
				+		old = atomic_cmpxchg(&lock->val, val, new);
			
 
				+		if (old == val)
			
 
				+			break;
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * we won the trylock
			
 
				+	 */
			
 
				+	if (new == _Q_LOCKED_VAL)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * we're pending, wait for the owner to go away.
			
 
				+	 *
			
 
				+	 * *,1,1 -> *,1,0
			
 
				+	 *
			
 
				+	 * this wait loop must be a load-acquire such that we match the
			
 
				+	 * store-release that clears the locked bit and create lock
			
 
				+	 * sequentiality; this is because not all clear_pending_set_locked()
			
 
				+	 * implementations imply full barriers.
			
 
				+	 */
			
 
				+	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
			
 
				+		cpu_relax();
			
 
				+
			
 
				+	/*
			
 
				+	 * take ownership and clear the pending bit.
			
 
				+	 *
			
 
				+	 * *,1,0 -> *,0,1
			
 
				+	 */
			
 
				+	clear_pending_set_locked(lock);
			
 
				+	return;
			
 
				+
			
 
				+	/*
			
 
				+	 * End of pending bit optimistic spinning and beginning of MCS
			
 
				+	 * queuing.
			
 
				+	 */
			
 
				+queue:
			
 
				+	node = this_cpu_ptr(&mcs_nodes[0]);
			
 
				+	idx = node->count++;
			
 
				+	tail = encode_tail(smp_processor_id(), idx);
			
 
				+
			
 
				+	node += idx;
			
 
				+	node->locked = 0;
			
 
				+	node->next = NULL;
			
 
				+	pv_init_node(node);
			
 
				+
			
 
				+	/*
			
 
				+	 * We touched a (possibly) cold cacheline in the per-cpu queue node;
			
 
				+	 * attempt the trylock once more in the hope someone let go while we
			
 
				+	 * weren't watching.
			
 
				+	 */
			
 
				+	if (queued_spin_trylock(lock))
			
 
				+		goto release;
			
 
				+
			
 
				+	/*
			
 
				+	 * We have already touched the queueing cacheline; don't bother with
			
 
				+	 * pending stuff.
			
 
				+	 *
			
 
				+	 * p,*,* -> n,*,*
			
 
				+	 */
			
 
				+	old = xchg_tail(lock, tail);
			
 
				+
			
 
				+	/*
			
 
				+	 * if there was a previous node; link it and wait until reaching the
			
 
				+	 * head of the waitqueue.
			
 
				+	 */
			
 
				+	if (old & _Q_TAIL_MASK) {
			
 
				+		prev = decode_tail(old);
			
 
				+		WRITE_ONCE(prev->next, node);
			
 
				+
			
 
				+		pv_wait_node(node);
			
 
				+		arch_mcs_spin_lock_contended(&node->locked);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * we're at the head of the waitqueue, wait for the owner & pending to
			
 
				+	 * go away.
			
 
				+	 *
			
 
				+	 * *,x,y -> *,0,0
			
 
				+	 *
			
 
				+	 * this wait loop must use a load-acquire such that we match the
			
 
				+	 * store-release that clears the locked bit and create lock
			
 
				+	 * sequentiality; this is because the set_locked() function below
			
 
				+	 * does not imply a full barrier.
			
 
				+	 *
			
 
				+	 */
			
 
				+	pv_wait_head(lock, node);
			
 
				+	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
			
 
				+		cpu_relax();
			
 
				+
			
 
				+	/*
			
 
				+	 * claim the lock:
			
 
				+	 *
			
 
				+	 * n,0,0 -> 0,0,1 : lock, uncontended
			
 
				+	 * *,0,0 -> *,0,1 : lock, contended
			
 
				+	 *
			
 
				+	 * If the queue head is the only one in the queue (lock value == tail),
			
 
				+	 * clear the tail code and grab the lock. Otherwise, we only need
			
 
				+	 * to grab the lock.
			
 
				+	 */
			
 
				+	for (;;) {
			
 
				+		if (val != tail) {
			
 
				+			set_locked(lock);
			
 
				+			break;
			
 
				+		}
			
 
				+		old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
			
 
				+		if (old == val)
			
 
				+			goto release;	/* No contention */
			
 
				+
			
 
				+		val = old;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * contended path; wait for next, release.
			
 
				+	 */
			
 
				+	while (!(next = READ_ONCE(node->next)))
			
 
				+		cpu_relax();
			
 
				+
			
 
				+	arch_mcs_spin_unlock_contended(&next->locked);
			
 
				+	pv_kick_node(next);
			
 
				+
			
 
				+release:
			
 
				+	/*
			
 
				+	 * release the node
			
 
				+	 */
			
 
				+	this_cpu_dec(mcs_nodes[0].count);
			
 
				+}
			
 
				+EXPORT_SYMBOL(queued_spin_lock_slowpath);
			
 
				+
			
 
				+/*
			
 
				+ * Generate the paravirt code for queued_spin_unlock_slowpath().
			
 
				+ */
			
 
				+#if !defined(_GEN_PV_LOCK_SLOWPATH) && defined(CONFIG_PARAVIRT_SPINLOCKS)
			
 
				+#define _GEN_PV_LOCK_SLOWPATH
			
 
				+
			
 
				+#undef  pv_enabled
			
 
				+#define pv_enabled()	true
			
 
				+
			
 
				+#undef pv_init_node
			
 
				+#undef pv_wait_node
			
 
				+#undef pv_kick_node
			
 
				+#undef pv_wait_head
			
 
				+
			
 
				+#undef  queued_spin_lock_slowpath
			
 
				+#define queued_spin_lock_slowpath	__pv_queued_spin_lock_slowpath
			
 
				+
			
 
				+#include "qspinlock_paravirt.h"
			
 
				+#include "qspinlock.c"
			
 
				+
			
 
				+#endif
			
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -0,0 +1,325 @@
 
				+#ifndef _GEN_PV_LOCK_SLOWPATH
			
 
				+#error "do not include this file"
			
 
				+#endif
			
 
				+
			
 
				+#include <linux/hash.h>
			
 
				+#include <linux/bootmem.h>
			
 
				+
			
 
				+/*
			
 
				+ * Implement paravirt qspinlocks; the general idea is to halt the vcpus instead
			
 
				+ * of spinning them.
			
 
				+ *
			
 
				+ * This relies on the architecture to provide two paravirt hypercalls:
			
 
				+ *
			
 
				+ *   pv_wait(u8 *ptr, u8 val) -- suspends the vcpu if *ptr == val
			
 
				+ *   pv_kick(cpu)             -- wakes a suspended vcpu
			
 
				+ *
			
 
				+ * Using these we implement __pv_queued_spin_lock_slowpath() and
			
 
				+ * __pv_queued_spin_unlock() to replace native_queued_spin_lock_slowpath() and
			
 
				+ * native_queued_spin_unlock().
			
 
				+ */
			
 
				+
			
 
				+#define _Q_SLOW_VAL	(3U << _Q_LOCKED_OFFSET)
			
 
				+
			
 
				+enum vcpu_state {
			
 
				+	vcpu_running = 0,
			
 
				+	vcpu_halted,
			
 
				+};
			
 
				+
			
 
				+struct pv_node {
			
 
				+	struct mcs_spinlock	mcs;
			
 
				+	struct mcs_spinlock	__res[3];
			
 
				+
			
 
				+	int			cpu;
			
 
				+	u8			state;
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Lock and MCS node addresses hash table for fast lookup
			
 
				+ *
			
 
				+ * Hashing is done on a per-cacheline basis to minimize the need to access
			
 
				+ * more than one cacheline.
			
 
				+ *
			
 
				+ * Dynamically allocate a hash table big enough to hold at least 4X the
			
 
				+ * number of possible cpus in the system. Allocation is done on page
			
 
				+ * granularity. So the minimum number of hash buckets should be at least
			
 
				+ * 256 (64-bit) or 512 (32-bit) to fully utilize a 4k page.
			
 
				+ *
			
 
				+ * Since we should not be holding locks from NMI context (very rare indeed) the
			
 
				+ * max load factor is 0.75, which is around the point where open addressing
			
 
				+ * breaks down.
			
 
				+ *
			
 
				+ */
			
 
				+struct pv_hash_entry {
			
 
				+	struct qspinlock *lock;
			
 
				+	struct pv_node   *node;
			
 
				+};
			
 
				+
			
 
				+#define PV_HE_PER_LINE	(SMP_CACHE_BYTES / sizeof(struct pv_hash_entry))
			
 
				+#define PV_HE_MIN	(PAGE_SIZE / sizeof(struct pv_hash_entry))
			
 
				+
			
 
				+static struct pv_hash_entry *pv_lock_hash;
			
 
				+static unsigned int pv_lock_hash_bits __read_mostly;
			
 
				+
			
 
				+/*
			
 
				+ * Allocate memory for the PV qspinlock hash buckets
			
 
				+ *
			
 
				+ * This function should be called from the paravirt spinlock initialization
			
 
				+ * routine.
			
 
				+ */
			
 
				+void __init __pv_init_lock_hash(void)
			
 
				+{
			
 
				+	int pv_hash_size = ALIGN(4 * num_possible_cpus(), PV_HE_PER_LINE);
			
 
				+
			
 
				+	if (pv_hash_size < PV_HE_MIN)
			
 
				+		pv_hash_size = PV_HE_MIN;
			
 
				+
			
 
				+	/*
			
 
				+	 * Allocate space from bootmem which should be page-size aligned
			
 
				+	 * and hence cacheline aligned.
			
 
				+	 */
			
 
				+	pv_lock_hash = alloc_large_system_hash("PV qspinlock",
			
 
				+					       sizeof(struct pv_hash_entry),
			
 
				+					       pv_hash_size, 0, HASH_EARLY,
			
 
				+					       &pv_lock_hash_bits, NULL,
			
 
				+					       pv_hash_size, pv_hash_size);
			
 
				+}
			
 
				+
			
 
				+#define for_each_hash_entry(he, offset, hash)						\
			
 
				+	for (hash &= ~(PV_HE_PER_LINE - 1), he = &pv_lock_hash[hash], offset = 0;	\
			
 
				+	     offset < (1 << pv_lock_hash_bits);						\
			
 
				+	     offset++, he = &pv_lock_hash[(hash + offset) & ((1 << pv_lock_hash_bits) - 1)])
			
 
				+
			
 
				+static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
			
 
				+{
			
 
				+	unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
			
 
				+	struct pv_hash_entry *he;
			
 
				+
			
 
				+	for_each_hash_entry(he, offset, hash) {
			
 
				+		if (!cmpxchg(&he->lock, NULL, lock)) {
			
 
				+			WRITE_ONCE(he->node, node);
			
 
				+			return &he->lock;
			
 
				+		}
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Hard assume there is a free entry for us.
			
 
				+	 *
			
 
				+	 * This is guaranteed by ensuring every blocked lock only ever consumes
			
 
				+	 * a single entry, and since we only have 4 nesting levels per CPU
			
 
				+	 * and allocated 4*nr_possible_cpus(), this must be so.
			
 
				+	 *
			
 
				+	 * The single entry is guaranteed by having the lock owner unhash
			
 
				+	 * before it releases.
			
 
				+	 */
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+static struct pv_node *pv_unhash(struct qspinlock *lock)
			
 
				+{
			
 
				+	unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
			
 
				+	struct pv_hash_entry *he;
			
 
				+	struct pv_node *node;
			
 
				+
			
 
				+	for_each_hash_entry(he, offset, hash) {
			
 
				+		if (READ_ONCE(he->lock) == lock) {
			
 
				+			node = READ_ONCE(he->node);
			
 
				+			WRITE_ONCE(he->lock, NULL);
			
 
				+			return node;
			
 
				+		}
			
 
				+	}
			
 
				+	/*
			
 
				+	 * Hard assume we'll find an entry.
			
 
				+	 *
			
 
				+	 * This guarantees a limited lookup time and is itself guaranteed by
			
 
				+	 * having the lock owner do the unhash -- IFF the unlock sees the
			
 
				+	 * SLOW flag, there MUST be a hash entry.
			
 
				+	 */
			
 
				+	BUG();
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Initialize the PV part of the mcs_spinlock node.
			
 
				+ */
			
 
				+static void pv_init_node(struct mcs_spinlock *node)
			
 
				+{
			
 
				+	struct pv_node *pn = (struct pv_node *)node;
			
 
				+
			
 
				+	BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock));
			
 
				+
			
 
				+	pn->cpu = smp_processor_id();
			
 
				+	pn->state = vcpu_running;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Wait for node->locked to become true, halt the vcpu after a short spin.
			
 
				+ * pv_kick_node() is used to wake the vcpu again.
			
 
				+ */
			
 
				+static void pv_wait_node(struct mcs_spinlock *node)
			
 
				+{
			
 
				+	struct pv_node *pn = (struct pv_node *)node;
			
 
				+	int loop;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		for (loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				+			if (READ_ONCE(node->locked))
			
 
				+				return;
			
 
				+			cpu_relax();
			
 
				+		}
			
 
				+
			
 
				+		/*
			
 
				+		 * Order pn->state vs pn->locked thusly:
			
 
				+		 *
			
 
				+		 * [S] pn->state = vcpu_halted	  [S] next->locked = 1
			
 
				+		 *     MB			      MB
			
 
				+		 * [L] pn->locked		[RmW] pn->state = vcpu_running
			
 
				+		 *
			
 
				+		 * Matches the xchg() from pv_kick_node().
			
 
				+		 */
			
 
				+		smp_store_mb(pn->state, vcpu_halted);
			
 
				+
			
 
				+		if (!READ_ONCE(node->locked))
			
 
				+			pv_wait(&pn->state, vcpu_halted);
			
 
				+
			
 
				+		/*
			
 
				+		 * Reset the vCPU state to avoid unncessary CPU kicking
			
 
				+		 */
			
 
				+		WRITE_ONCE(pn->state, vcpu_running);
			
 
				+
			
 
				+		/*
			
 
				+		 * If the locked flag is still not set after wakeup, it is a
			
 
				+		 * spurious wakeup and the vCPU should wait again. However,
			
 
				+		 * there is a pretty high overhead for CPU halting and kicking.
			
 
				+		 * So it is better to spin for a while in the hope that the
			
 
				+		 * MCS lock will be released soon.
			
 
				+		 */
			
 
				+	}
			
 
				+	/*
			
 
				+	 * By now our node->locked should be 1 and our caller will not actually
			
 
				+	 * spin-wait for it. We do however rely on our caller to do a
			
 
				+	 * load-acquire for us.
			
 
				+	 */
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Called after setting next->locked = 1, used to wake those stuck in
			
 
				+ * pv_wait_node().
			
 
				+ */
			
 
				+static void pv_kick_node(struct mcs_spinlock *node)
			
 
				+{
			
 
				+	struct pv_node *pn = (struct pv_node *)node;
			
 
				+
			
 
				+	/*
			
 
				+	 * Note that because node->locked is already set, this actual
			
 
				+	 * mcs_spinlock entry could be re-used already.
			
 
				+	 *
			
 
				+	 * This should be fine however, kicking people for no reason is
			
 
				+	 * harmless.
			
 
				+	 *
			
 
				+	 * See the comment in pv_wait_node().
			
 
				+	 */
			
 
				+	if (xchg(&pn->state, vcpu_running) == vcpu_halted)
			
 
				+		pv_kick(pn->cpu);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Wait for l->locked to become clear; halt the vcpu after a short spin.
			
 
				+ * __pv_queued_spin_unlock() will wake us.
			
 
				+ */
			
 
				+static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
			
 
				+{
			
 
				+	struct pv_node *pn = (struct pv_node *)node;
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+	struct qspinlock **lp = NULL;
			
 
				+	int loop;
			
 
				+
			
 
				+	for (;;) {
			
 
				+		for (loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				+			if (!READ_ONCE(l->locked))
			
 
				+				return;
			
 
				+			cpu_relax();
			
 
				+		}
			
 
				+
			
 
				+		WRITE_ONCE(pn->state, vcpu_halted);
			
 
				+		if (!lp) { /* ONCE */
			
 
				+			lp = pv_hash(lock, pn);
			
 
				+			/*
			
 
				+			 * lp must be set before setting _Q_SLOW_VAL
			
 
				+			 *
			
 
				+			 * [S] lp = lock                [RmW] l = l->locked = 0
			
 
				+			 *     MB                             MB
			
 
				+			 * [S] l->locked = _Q_SLOW_VAL  [L]   lp
			
 
				+			 *
			
 
				+			 * Matches the cmpxchg() in __pv_queued_spin_unlock().
			
 
				+			 */
			
 
				+			if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
			
 
				+				/*
			
 
				+				 * The lock is free and _Q_SLOW_VAL has never
			
 
				+				 * been set. Therefore we need to unhash before
			
 
				+				 * getting the lock.
			
 
				+				 */
			
 
				+				WRITE_ONCE(*lp, NULL);
			
 
				+				return;
			
 
				+			}
			
 
				+		}
			
 
				+		pv_wait(&l->locked, _Q_SLOW_VAL);
			
 
				+
			
 
				+		/*
			
 
				+		 * The unlocker should have freed the lock before kicking the
			
 
				+		 * CPU. So if the lock is still not free, it is a spurious
			
 
				+		 * wakeup and so the vCPU should wait again after spinning for
			
 
				+		 * a while.
			
 
				+		 */
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Lock is unlocked now; the caller will acquire it without waiting.
			
 
				+	 * As with pv_wait_node() we rely on the caller to do a load-acquire
			
 
				+	 * for us.
			
 
				+	 */
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * PV version of the unlock function to be used in stead of
			
 
				+ * queued_spin_unlock().
			
 
				+ */
			
 
				+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+	struct pv_node *node;
			
 
				+
			
 
				+	/*
			
 
				+	 * We must not unlock if SLOW, because in that case we must first
			
 
				+	 * unhash. Otherwise it would be possible to have multiple @lock
			
 
				+	 * entries, which would be BAD.
			
 
				+	 */
			
 
				+	if (likely(cmpxchg(&l->locked, _Q_LOCKED_VAL, 0) == _Q_LOCKED_VAL))
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Since the above failed to release, this must be the SLOW path.
			
 
				+	 * Therefore start by looking up the blocked node and unhashing it.
			
 
				+	 */
			
 
				+	node = pv_unhash(lock);
			
 
				+
			
 
				+	/*
			
 
				+	 * Now that we have a reference to the (likely) blocked pv_node,
			
 
				+	 * release the lock.
			
 
				+	 */
			
 
				+	smp_store_release(&l->locked, 0);
			
 
				+
			
 
				+	/*
			
 
				+	 * At this point the memory pointed at by lock can be freed/reused,
			
 
				+	 * however we can still use the pv_node to kick the CPU.
			
 
				+	 */
			
 
				+	if (READ_ONCE(node->state) == vcpu_halted)
			
 
				+		pv_kick(node->cpu);
			
 
				+}
			
 
				+/*
			
 
				+ * Include the architecture specific callee-save thunk of the
			
 
				+ * __pv_queued_spin_unlock(). This thunk is put together with
			
 
				+ * __pv_queued_spin_unlock() near the top of the file to make sure
			
 
				+ * that the callee-save thunk and the real unlock function are close
			
 
				+ * to each other sharing consecutive instruction cachelines.
			
 
				+ */
			
 
				+#include <asm/qspinlock_paravirt.h>
			
 
				+
			
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -70,10 +70,10 @@ static void fixup_rt_mutex_waiters(struct rt_mutex *lock)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * We can speed up the acquire/release, if the architecture
			
 
				- * supports cmpxchg and if there's no debugging state to be set up
			
 
				+ * We can speed up the acquire/release, if there's no debugging state to be
			
 
				+ * set up.
			
 
				  */
			
 
				-#if defined(__HAVE_ARCH_CMPXCHG) && !defined(CONFIG_DEBUG_RT_MUTEXES)
			
 
				+#ifndef CONFIG_DEBUG_RT_MUTEXES
			
 
				 # define rt_mutex_cmpxchg(l,c,n)	(cmpxchg(&l->owner, c, n) == c)
			
 
				 static inline void mark_rt_mutex_waiters(struct rt_mutex *lock)
			
 
				 {
			
@@ -1443,10 +1443,17 @@ EXPORT_SYMBOL_GPL(rt_mutex_timed_lock);
 
				  *
			
 
				  * @lock:	the rt_mutex to be locked
			
 
				  *
			
 
				+ * This function can only be called in thread context. It's safe to
			
 
				+ * call it from atomic regions, but not from hard interrupt or soft
			
 
				+ * interrupt context.
			
 
				+ *
			
 
				  * Returns 1 on success and 0 on contention
			
 
				  */
			
 
				 int __sched rt_mutex_trylock(struct rt_mutex *lock)
			
 
				 {
			
 
				+	if (WARN_ON(in_irq() || in_nmi() || in_serving_softirq()))
			
 
				+		return 0;
			
 
				+
			
 
				 	return rt_mutex_fasttrylock(lock, rt_mutex_slowtrylock);
			
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(rt_mutex_trylock);
			
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -409,11 +409,24 @@ done:
 
				 	return taken;
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return true if the rwsem has active spinner
			
 
				+ */
			
 
				+static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
			
 
				+{
			
 
				+	return osq_is_locked(&sem->osq);
			
 
				+}
			
 
				+
			
 
				 #else
			
 
				 static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
			
 
				 {
			
 
				 	return false;
			
 
				 }
			
 
				+
			
 
				+static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
			
 
				+{
			
 
				+	return false;
			
 
				+}
			
 
				 #endif
			
 
				 
			
 
				 /*
			
@@ -496,7 +509,38 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
 
				 {
			
 
				 	unsigned long flags;
			
 
				 
			
 
				+	/*
			
 
				+	 * If a spinner is present, it is not necessary to do the wakeup.
			
 
				+	 * Try to do wakeup only if the trylock succeeds to minimize
			
 
				+	 * spinlock contention which may introduce too much delay in the
			
 
				+	 * unlock operation.
			
 
				+	 *
			
 
				+	 *    spinning writer		up_write/up_read caller
			
 
				+	 *    ---------------		-----------------------
			
 
				+	 * [S]   osq_unlock()		[L]   osq
			
 
				+	 *	 MB			      RMB
			
 
				+	 * [RmW] rwsem_try_write_lock() [RmW] spin_trylock(wait_lock)
			
 
				+	 *
			
 
				+	 * Here, it is important to make sure that there won't be a missed
			
 
				+	 * wakeup while the rwsem is free and the only spinning writer goes
			
 
				+	 * to sleep without taking the rwsem. Even when the spinning writer
			
 
				+	 * is just going to break out of the waiting loop, it will still do
			
 
				+	 * a trylock in rwsem_down_write_failed() before sleeping. IOW, if
			
 
				+	 * rwsem_has_spinner() is true, it will guarantee at least one
			
 
				+	 * trylock attempt on the rwsem later on.
			
 
				+	 */
			
 
				+	if (rwsem_has_spinner(sem)) {
			
 
				+		/*
			
 
				+		 * The smp_rmb() here is to make sure that the spinner
			
 
				+		 * state is consulted before reading the wait_lock.
			
 
				+		 */
			
 
				+		smp_rmb();
			
 
				+		if (!raw_spin_trylock_irqsave(&sem->wait_lock, flags))
			
 
				+			return sem;
			
 
				+		goto locked;
			
 
				+	}
			
 
				 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
			
 
				+locked:
			
 
				 
			
 
				 	/* do nothing if list empty */
			
 
				 	if (!list_empty(&sem->wait_list))
			
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -341,7 +341,7 @@ long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
 
				 	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
			
 
				 	 * an event.
			
 
				 	 */
			
 
				-	set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
			
 
				+	smp_store_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
			
 
				 
			
 
				 	return timeout;
			
 
				 }
			
@@ -354,7 +354,7 @@ int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
 
				 	 * doesn't imply write barrier and the users expects write
			
 
				 	 * barrier semantics on wakeup functions.  The following
			
 
				 	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
			
 
				-	 * and is paired with set_mb() in wait_woken().
			
 
				+	 * and is paired with smp_store_mb() in wait_woken().
			
 
				 	 */
			
 
				 	smp_wmb(); /* C */
			
 
				 	wait->flags |= WQ_FLAG_WOKEN;