9 anos atrás · 24af98c4cf
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1673,8 +1673,8 @@ There are some more advanced barrier functions:
 
				  (*) smp_store_mb(var, value)
			
 
				 
			
 
				      This assigns the value to the variable and then inserts a full memory
			
 
				-     barrier after it, depending on the function.  It isn't guaranteed to
			
 
				-     insert anything more than a compiler barrier in a UP compilation.
			
 
				+     barrier after it.  It isn't guaranteed to insert anything more than a
			
 
				+     compiler barrier in a UP compilation.
			
 
				 
			
 
				 
			
 
				  (*) smp_mb__before_atomic();
			
--- a/arch/blackfin/include/asm/cmpxchg.h
+++ b/arch/blackfin/include/asm/cmpxchg.h
@@ -128,6 +128,5 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
 
				 #endif /* !CONFIG_SMP */
			
 
				 
			
 
				 #define xchg(ptr, x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x), (ptr), sizeof(*(ptr))))
			
 
				-#define tas(ptr) ((void)xchg((ptr), 1))
			
 
				 
			
 
				 #endif /* __ARCH_BLACKFIN_CMPXCHG__ */
			
--- a/arch/c6x/include/asm/cmpxchg.h
+++ b/arch/c6x/include/asm/cmpxchg.h
@@ -47,8 +47,6 @@ static inline unsigned int __xchg(unsigned int x, volatile void *ptr, int size)
 
				 #define xchg(ptr, x) \
			
 
				 	((__typeof__(*(ptr)))__xchg((unsigned int)(x), (void *) (ptr), \
			
 
				 				    sizeof(*(ptr))))
			
 
				-#define tas(ptr)    xchg((ptr), 1)
			
 
				-
			
 
				 
			
 
				 #include <asm-generic/cmpxchg-local.h>
			
 
				 
			
--- a/arch/frv/include/asm/cmpxchg.h
+++ b/arch/frv/include/asm/cmpxchg.h
@@ -69,8 +69,6 @@ extern uint32_t __xchg_32(uint32_t i, volatile void *v);
 
				 
			
 
				 #endif
			
 
				 
			
 
				-#define tas(ptr) (xchg((ptr), 1))
			
 
				-
			
 
				 /*****************************************************************************/
			
 
				 /*
			
 
				  * compare and conditionally exchange value with memory
			
--- a/arch/ia64/include/asm/barrier.h
+++ b/arch/ia64/include/asm/barrier.h
@@ -77,7 +77,7 @@ do {									\
 
				 	___p1;								\
			
 
				 })
			
 
				 
			
 
				-#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 /*
			
 
				  * The group barrier in front of the rsm & ssm are necessary to ensure
			
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -34,7 +34,7 @@
 
				 #define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
			
 
				 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
			
 
				 
			
 
				-#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				+#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 #ifdef __SUBARCH_HAS_LWSYNC
			
 
				 #    define SMPWMB      LWSYNC
			
--- a/arch/s390/include/asm/barrier.h
+++ b/arch/s390/include/asm/barrier.h
@@ -36,7 +36,7 @@
 
				 #define smp_mb__before_atomic()		smp_mb()
			
 
				 #define smp_mb__after_atomic()		smp_mb()
			
 
				 
			
 
				-#define smp_store_mb(var, value)		do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)	do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 
			
 
				 #define smp_store_release(p, v)						\
			
 
				 do {									\
			
--- a/arch/tile/include/asm/cmpxchg.h
+++ b/arch/tile/include/asm/cmpxchg.h
@@ -127,8 +127,6 @@ long long _atomic64_cmpxchg(long long *v, long long o, long long n);
 
				 
			
 
				 #endif
			
 
				 
			
 
				-#define tas(ptr) xchg((ptr), 1)
			
 
				-
			
 
				 #endif /* __ASSEMBLY__ */
			
 
				 
			
 
				 #endif /* _ASM_TILE_CMPXCHG_H */
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -687,6 +687,14 @@ config PARAVIRT_SPINLOCKS
 
				 
			
 
				 	  If you are unsure how to answer this question, answer Y.
			
 
				 
			
 
				+config QUEUED_LOCK_STAT
			
 
				+	bool "Paravirt queued spinlock statistics"
			
 
				+	depends on PARAVIRT_SPINLOCKS && DEBUG_FS && QUEUED_SPINLOCKS
			
 
				+	---help---
			
 
				+	  Enable the collection of statistical data on the slowpath
			
 
				+	  behavior of paravirtualized queued spinlocks and report
			
 
				+	  them on debugfs.
			
 
				+
			
 
				 source "arch/x86/xen/Kconfig"
			
 
				 
			
 
				 config KVM_GUEST
			
--- a/arch/x86/include/asm/qspinlock_paravirt.h
+++ b/arch/x86/include/asm/qspinlock_paravirt.h
@@ -1,6 +1,65 @@
 
				 #ifndef __ASM_QSPINLOCK_PARAVIRT_H
			
 
				 #define __ASM_QSPINLOCK_PARAVIRT_H
			
 
				 
			
 
				+/*
			
 
				+ * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit
			
 
				+ * registers. For i386, however, only 1 32-bit register needs to be saved
			
 
				+ * and restored. So an optimized version of __pv_queued_spin_unlock() is
			
 
				+ * hand-coded for 64-bit, but it isn't worthwhile to do it for 32-bit.
			
 
				+ */
			
 
				+#ifdef CONFIG_64BIT
			
 
				+
			
 
				+PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath);
			
 
				+#define __pv_queued_spin_unlock	__pv_queued_spin_unlock
			
 
				+#define PV_UNLOCK		"__raw_callee_save___pv_queued_spin_unlock"
			
 
				+#define PV_UNLOCK_SLOWPATH	"__raw_callee_save___pv_queued_spin_unlock_slowpath"
			
 
				+
			
 
				+/*
			
 
				+ * Optimized assembly version of __raw_callee_save___pv_queued_spin_unlock
			
 
				+ * which combines the registers saving trunk and the body of the following
			
 
				+ * C code:
			
 
				+ *
			
 
				+ * void __pv_queued_spin_unlock(struct qspinlock *lock)
			
 
				+ * {
			
 
				+ *	struct __qspinlock *l = (void *)lock;
			
 
				+ *	u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
			
 
				+ *
			
 
				+ *	if (likely(lockval == _Q_LOCKED_VAL))
			
 
				+ *		return;
			
 
				+ *	pv_queued_spin_unlock_slowpath(lock, lockval);
			
 
				+ * }
			
 
				+ *
			
 
				+ * For x86-64,
			
 
				+ *   rdi = lock              (first argument)
			
 
				+ *   rsi = lockval           (second argument)
			
 
				+ *   rdx = internal variable (set to 0)
			
 
				+ */
			
 
				+asm    (".pushsection .text;"
			
 
				+	".globl " PV_UNLOCK ";"
			
 
				+	".align 4,0x90;"
			
 
				+	PV_UNLOCK ": "
			
 
				+	"push  %rdx;"
			
 
				+	"mov   $0x1,%eax;"
			
 
				+	"xor   %edx,%edx;"
			
 
				+	"lock cmpxchg %dl,(%rdi);"
			
 
				+	"cmp   $0x1,%al;"
			
 
				+	"jne   .slowpath;"
			
 
				+	"pop   %rdx;"
			
 
				+	"ret;"
			
 
				+	".slowpath: "
			
 
				+	"push   %rsi;"
			
 
				+	"movzbl %al,%esi;"
			
 
				+	"call " PV_UNLOCK_SLOWPATH ";"
			
 
				+	"pop    %rsi;"
			
 
				+	"pop    %rdx;"
			
 
				+	"ret;"
			
 
				+	".size " PV_UNLOCK ", .-" PV_UNLOCK ";"
			
 
				+	".popsection");
			
 
				+
			
 
				+#else /* CONFIG_64BIT */
			
 
				+
			
 
				+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
			
 
				 PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock);
			
 
				 
			
 
				+#endif /* CONFIG_64BIT */
			
 
				 #endif
			
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -93,7 +93,7 @@
 
				 #endif	/* CONFIG_SMP */
			
 
				 
			
 
				 #ifndef smp_store_mb
			
 
				-#define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); mb(); } while (0)
			
 
				+#define smp_store_mb(var, value)  do { WRITE_ONCE(var, value); smp_mb(); } while (0)
			
 
				 #endif
			
 
				 
			
 
				 #ifndef smp_mb__before_atomic
			
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -12,8 +12,9 @@
 
				  * GNU General Public License for more details.
			
 
				  *
			
 
				  * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
			
 
				+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
			
 
				  *
			
 
				- * Authors: Waiman Long <waiman.long@hp.com>
			
 
				+ * Authors: Waiman Long <waiman.long@hpe.com>
			
 
				  */
			
 
				 #ifndef __ASM_GENERIC_QSPINLOCK_H
			
 
				 #define __ASM_GENERIC_QSPINLOCK_H
			
@@ -62,7 +63,7 @@ static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
 
				 static __always_inline int queued_spin_trylock(struct qspinlock *lock)
			
 
				 {
			
 
				 	if (!atomic_read(&lock->val) &&
			
 
				-	   (atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL) == 0))
			
 
				+	   (atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL) == 0))
			
 
				 		return 1;
			
 
				 	return 0;
			
 
				 }
			
@@ -77,7 +78,7 @@ static __always_inline void queued_spin_lock(struct qspinlock *lock)
 
				 {
			
 
				 	u32 val;
			
 
				 
			
 
				-	val = atomic_cmpxchg(&lock->val, 0, _Q_LOCKED_VAL);
			
 
				+	val = atomic_cmpxchg_acquire(&lock->val, 0, _Q_LOCKED_VAL);
			
 
				 	if (likely(val == 0))
			
 
				 		return;
			
 
				 	queued_spin_lock_slowpath(lock, val);
			
@@ -93,7 +94,7 @@ static __always_inline void queued_spin_unlock(struct qspinlock *lock)
 
				 	/*
			
 
				 	 * smp_mb__before_atomic() in order to guarantee release semantics
			
 
				 	 */
			
 
				-	smp_mb__before_atomic_dec();
			
 
				+	smp_mb__before_atomic();
			
 
				 	atomic_sub(_Q_LOCKED_VAL, &lock->val);
			
 
				 }
			
 
				 #endif
			
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -299,6 +299,23 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
				 	__u.__val;					\
			
 
				 })
			
 
				 
			
 
				+/**
			
 
				+ * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
			
 
				+ * @cond: boolean expression to wait for
			
 
				+ *
			
 
				+ * Equivalent to using smp_load_acquire() on the condition variable but employs
			
 
				+ * the control dependency of the wait to reduce the barrier on many platforms.
			
 
				+ *
			
 
				+ * The control dependency provides a LOAD->STORE order, the additional RMB
			
 
				+ * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
			
 
				+ * aka. ACQUIRE.
			
 
				+ */
			
 
				+#define smp_cond_acquire(cond)	do {		\
			
 
				+	while (!(cond))				\
			
 
				+		cpu_relax();			\
			
 
				+	smp_rmb(); /* ctrl + rmb := acquire */	\
			
 
				+} while (0)
			
 
				+
			
 
				 #endif /* __KERNEL__ */
			
 
				 
			
 
				 #endif /* __ASSEMBLY__ */
			
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -725,9 +725,12 @@ static struct futex_pi_state * alloc_pi_state(void)
 
				 }
			
 
				 
			
 
				 /*
			
 
				+ * Drops a reference to the pi_state object and frees or caches it
			
 
				+ * when the last reference is gone.
			
 
				+ *
			
 
				  * Must be called with the hb lock held.
			
 
				  */
			
 
				-static void free_pi_state(struct futex_pi_state *pi_state)
			
 
				+static void put_pi_state(struct futex_pi_state *pi_state)
			
 
				 {
			
 
				 	if (!pi_state)
			
 
				 		return;
			
@@ -1706,31 +1709,35 @@ retry_private:
 
				 		 * exist yet, look it up one more time to ensure we have a
			
 
				 		 * reference to it. If the lock was taken, ret contains the
			
 
				 		 * vpid of the top waiter task.
			
 
				+		 * If the lock was not taken, we have pi_state and an initial
			
 
				+		 * refcount on it. In case of an error we have nothing.
			
 
				 		 */
			
 
				 		if (ret > 0) {
			
 
				 			WARN_ON(pi_state);
			
 
				 			drop_count++;
			
 
				 			task_count++;
			
 
				 			/*
			
 
				-			 * If we acquired the lock, then the user
			
 
				-			 * space value of uaddr2 should be vpid. It
			
 
				-			 * cannot be changed by the top waiter as it
			
 
				-			 * is blocked on hb2 lock if it tries to do
			
 
				-			 * so. If something fiddled with it behind our
			
 
				-			 * back the pi state lookup might unearth
			
 
				-			 * it. So we rather use the known value than
			
 
				-			 * rereading and handing potential crap to
			
 
				-			 * lookup_pi_state.
			
 
				+			 * If we acquired the lock, then the user space value
			
 
				+			 * of uaddr2 should be vpid. It cannot be changed by
			
 
				+			 * the top waiter as it is blocked on hb2 lock if it
			
 
				+			 * tries to do so. If something fiddled with it behind
			
 
				+			 * our back the pi state lookup might unearth it. So
			
 
				+			 * we rather use the known value than rereading and
			
 
				+			 * handing potential crap to lookup_pi_state.
			
 
				+			 *
			
 
				+			 * If that call succeeds then we have pi_state and an
			
 
				+			 * initial refcount on it.
			
 
				 			 */
			
 
				 			ret = lookup_pi_state(ret, hb2, &key2, &pi_state);
			
 
				 		}
			
 
				 
			
 
				 		switch (ret) {
			
 
				 		case 0:
			
 
				+			/* We hold a reference on the pi state. */
			
 
				 			break;
			
 
				+
			
 
				+			/* If the above failed, then pi_state is NULL */
			
 
				 		case -EFAULT:
			
 
				-			free_pi_state(pi_state);
			
 
				-			pi_state = NULL;
			
 
				 			double_unlock_hb(hb1, hb2);
			
 
				 			hb_waiters_dec(hb2);
			
 
				 			put_futex_key(&key2);
			
@@ -1746,8 +1753,6 @@ retry_private:
 
				 			 *   exit to complete.
			
 
				 			 * - The user space value changed.
			
 
				 			 */
			
 
				-			free_pi_state(pi_state);
			
 
				-			pi_state = NULL;
			
 
				 			double_unlock_hb(hb1, hb2);
			
 
				 			hb_waiters_dec(hb2);
			
 
				 			put_futex_key(&key2);
			
@@ -1801,30 +1806,58 @@ retry_private:
 
				 		 * of requeue_pi if we couldn't acquire the lock atomically.
			
 
				 		 */
			
 
				 		if (requeue_pi) {
			
 
				-			/* Prepare the waiter to take the rt_mutex. */
			
 
				+			/*
			
 
				+			 * Prepare the waiter to take the rt_mutex. Take a
			
 
				+			 * refcount on the pi_state and store the pointer in
			
 
				+			 * the futex_q object of the waiter.
			
 
				+			 */
			
 
				 			atomic_inc(&pi_state->refcount);
			
 
				 			this->pi_state = pi_state;
			
 
				 			ret = rt_mutex_start_proxy_lock(&pi_state->pi_mutex,
			
 
				 							this->rt_waiter,
			
 
				 							this->task);
			
 
				 			if (ret == 1) {
			
 
				-				/* We got the lock. */
			
 
				+				/*
			
 
				+				 * We got the lock. We do neither drop the
			
 
				+				 * refcount on pi_state nor clear
			
 
				+				 * this->pi_state because the waiter needs the
			
 
				+				 * pi_state for cleaning up the user space
			
 
				+				 * value. It will drop the refcount after
			
 
				+				 * doing so.
			
 
				+				 */
			
 
				 				requeue_pi_wake_futex(this, &key2, hb2);
			
 
				 				drop_count++;
			
 
				 				continue;
			
 
				 			} else if (ret) {
			
 
				-				/* -EDEADLK */
			
 
				+				/*
			
 
				+				 * rt_mutex_start_proxy_lock() detected a
			
 
				+				 * potential deadlock when we tried to queue
			
 
				+				 * that waiter. Drop the pi_state reference
			
 
				+				 * which we took above and remove the pointer
			
 
				+				 * to the state from the waiters futex_q
			
 
				+				 * object.
			
 
				+				 */
			
 
				 				this->pi_state = NULL;
			
 
				-				free_pi_state(pi_state);
			
 
				-				goto out_unlock;
			
 
				+				put_pi_state(pi_state);
			
 
				+				/*
			
 
				+				 * We stop queueing more waiters and let user
			
 
				+				 * space deal with the mess.
			
 
				+				 */
			
 
				+				break;
			
 
				 			}
			
 
				 		}
			
 
				 		requeue_futex(this, hb1, hb2, &key2);
			
 
				 		drop_count++;
			
 
				 	}
			
 
				 
			
 
				+	/*
			
 
				+	 * We took an extra initial reference to the pi_state either
			
 
				+	 * in futex_proxy_trylock_atomic() or in lookup_pi_state(). We
			
 
				+	 * need to drop it here again.
			
 
				+	 */
			
 
				+	put_pi_state(pi_state);
			
 
				+
			
 
				 out_unlock:
			
 
				-	free_pi_state(pi_state);
			
 
				 	double_unlock_hb(hb1, hb2);
			
 
				 	wake_up_q(&wake_q);
			
 
				 	hb_waiters_dec(hb2);
			
@@ -1973,7 +2006,7 @@ static void unqueue_me_pi(struct futex_q *q)
 
				 	__unqueue_futex(q);
			
 
				 
			
 
				 	BUG_ON(!q->pi_state);
			
 
				-	free_pi_state(q->pi_state);
			
 
				+	put_pi_state(q->pi_state);
			
 
				 	q->pi_state = NULL;
			
 
				 
			
 
				 	spin_unlock(q->lock_ptr);
			
@@ -2755,6 +2788,11 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
 
				 		if (q.pi_state && (q.pi_state->owner != current)) {
			
 
				 			spin_lock(q.lock_ptr);
			
 
				 			ret = fixup_pi_state_owner(uaddr2, &q, current);
			
 
				+			/*
			
 
				+			 * Drop the reference to the pi state which
			
 
				+			 * the requeue_pi() code acquired for us.
			
 
				+			 */
			
 
				+			put_pi_state(q.pi_state);
			
 
				 			spin_unlock(q.lock_ptr);
			
 
				 		}
			
 
				 	} else {
			
@@ -3046,7 +3084,8 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
 
				 
			
 
				 	if (op & FUTEX_CLOCK_REALTIME) {
			
 
				 		flags |= FLAGS_CLOCKRT;
			
 
				-		if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
			
 
				+		if (cmd != FUTEX_WAIT && cmd != FUTEX_WAIT_BITSET && \
			
 
				+		    cmd != FUTEX_WAIT_REQUEUE_PI)
			
 
				 			return -ENOSYS;
			
 
				 	}
			
 
				 
			
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -14,8 +14,9 @@
 
				  * (C) Copyright 2013-2015 Hewlett-Packard Development Company, L.P.
			
 
				  * (C) Copyright 2013-2014 Red Hat, Inc.
			
 
				  * (C) Copyright 2015 Intel Corp.
			
 
				+ * (C) Copyright 2015 Hewlett-Packard Enterprise Development LP
			
 
				  *
			
 
				- * Authors: Waiman Long <waiman.long@hp.com>
			
 
				+ * Authors: Waiman Long <waiman.long@hpe.com>
			
 
				  *          Peter Zijlstra <peterz@infradead.org>
			
 
				  */
			
 
				 
			
@@ -176,7 +177,12 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 
				 {
			
 
				 	struct __qspinlock *l = (void *)lock;
			
 
				 
			
 
				-	return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
			
 
				+	/*
			
 
				+	 * Use release semantics to make sure that the MCS node is properly
			
 
				+	 * initialized before changing the tail code.
			
 
				+	 */
			
 
				+	return (u32)xchg_release(&l->tail,
			
 
				+				 tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
			
 
				 }
			
 
				 
			
 
				 #else /* _Q_PENDING_BITS == 8 */
			
@@ -208,7 +214,11 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 
				 
			
 
				 	for (;;) {
			
 
				 		new = (val & _Q_LOCKED_PENDING_MASK) | tail;
			
 
				-		old = atomic_cmpxchg(&lock->val, val, new);
			
 
				+		/*
			
 
				+		 * Use release semantics to make sure that the MCS node is
			
 
				+		 * properly initialized before changing the tail code.
			
 
				+		 */
			
 
				+		old = atomic_cmpxchg_release(&lock->val, val, new);
			
 
				 		if (old == val)
			
 
				 			break;
			
 
				 
			
@@ -238,18 +248,20 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
				  */
			
 
				 
			
 
				 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
			
 
				-static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
			
 
				+static __always_inline void __pv_wait_node(struct mcs_spinlock *node,
			
 
				+					   struct mcs_spinlock *prev) { }
			
 
				 static __always_inline void __pv_kick_node(struct qspinlock *lock,
			
 
				 					   struct mcs_spinlock *node) { }
			
 
				-static __always_inline void __pv_wait_head(struct qspinlock *lock,
			
 
				-					   struct mcs_spinlock *node) { }
			
 
				+static __always_inline u32  __pv_wait_head_or_lock(struct qspinlock *lock,
			
 
				+						   struct mcs_spinlock *node)
			
 
				+						   { return 0; }
			
 
				 
			
 
				 #define pv_enabled()		false
			
 
				 
			
 
				 #define pv_init_node		__pv_init_node
			
 
				 #define pv_wait_node		__pv_wait_node
			
 
				 #define pv_kick_node		__pv_kick_node
			
 
				-#define pv_wait_head		__pv_wait_head
			
 
				+#define pv_wait_head_or_lock	__pv_wait_head_or_lock
			
 
				 
			
 
				 #ifdef CONFIG_PARAVIRT_SPINLOCKS
			
 
				 #define queued_spin_lock_slowpath	native_queued_spin_lock_slowpath
			
@@ -319,7 +331,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
				 		if (val == new)
			
 
				 			new |= _Q_PENDING_VAL;
			
 
				 
			
 
				-		old = atomic_cmpxchg(&lock->val, val, new);
			
 
				+		/*
			
 
				+		 * Acquire semantic is required here as the function may
			
 
				+		 * return immediately if the lock was free.
			
 
				+		 */
			
 
				+		old = atomic_cmpxchg_acquire(&lock->val, val, new);
			
 
				 		if (old == val)
			
 
				 			break;
			
 
				 
			
@@ -382,6 +398,7 @@ queue:
 
				 	 * p,*,* -> n,*,*
			
 
				 	 */
			
 
				 	old = xchg_tail(lock, tail);
			
 
				+	next = NULL;
			
 
				 
			
 
				 	/*
			
 
				 	 * if there was a previous node; link it and wait until reaching the
			
@@ -391,8 +408,18 @@ queue:
 
				 		prev = decode_tail(old);
			
 
				 		WRITE_ONCE(prev->next, node);
			
 
				 
			
 
				-		pv_wait_node(node);
			
 
				+		pv_wait_node(node, prev);
			
 
				 		arch_mcs_spin_lock_contended(&node->locked);
			
 
				+
			
 
				+		/*
			
 
				+		 * While waiting for the MCS lock, the next pointer may have
			
 
				+		 * been set by another lock waiter. We optimistically load
			
 
				+		 * the next pointer & prefetch the cacheline for writing
			
 
				+		 * to reduce latency in the upcoming MCS unlock operation.
			
 
				+		 */
			
 
				+		next = READ_ONCE(node->next);
			
 
				+		if (next)
			
 
				+			prefetchw(next);
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -406,11 +433,22 @@ queue:
 
				 	 * sequentiality; this is because the set_locked() function below
			
 
				 	 * does not imply a full barrier.
			
 
				 	 *
			
 
				+	 * The PV pv_wait_head_or_lock function, if active, will acquire
			
 
				+	 * the lock and return a non-zero value. So we have to skip the
			
 
				+	 * smp_load_acquire() call. As the next PV queue head hasn't been
			
 
				+	 * designated yet, there is no way for the locked value to become
			
 
				+	 * _Q_SLOW_VAL. So both the set_locked() and the
			
 
				+	 * atomic_cmpxchg_relaxed() calls will be safe.
			
 
				+	 *
			
 
				+	 * If PV isn't active, 0 will be returned instead.
			
 
				+	 *
			
 
				 	 */
			
 
				-	pv_wait_head(lock, node);
			
 
				-	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_PENDING_MASK)
			
 
				-		cpu_relax();
			
 
				+	if ((val = pv_wait_head_or_lock(lock, node)))
			
 
				+		goto locked;
			
 
				 
			
 
				+	smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
			
 
				+
			
 
				+locked:
			
 
				 	/*
			
 
				 	 * claim the lock:
			
 
				 	 *
			
@@ -422,11 +460,17 @@ queue:
 
				 	 * to grab the lock.
			
 
				 	 */
			
 
				 	for (;;) {
			
 
				-		if (val != tail) {
			
 
				+		/* In the PV case we might already have _Q_LOCKED_VAL set */
			
 
				+		if ((val & _Q_TAIL_MASK) != tail) {
			
 
				 			set_locked(lock);
			
 
				 			break;
			
 
				 		}
			
 
				-		old = atomic_cmpxchg(&lock->val, val, _Q_LOCKED_VAL);
			
 
				+		/*
			
 
				+		 * The smp_load_acquire() call above has provided the necessary
			
 
				+		 * acquire semantics required for locking. At most two
			
 
				+		 * iterations of this loop may be ran.
			
 
				+		 */
			
 
				+		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
			
 
				 		if (old == val)
			
 
				 			goto release;	/* No contention */
			
 
				 
			
@@ -434,10 +478,12 @@ queue:
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * contended path; wait for next, release.
			
 
				+	 * contended path; wait for next if not observed yet, release.
			
 
				 	 */
			
 
				-	while (!(next = READ_ONCE(node->next)))
			
 
				-		cpu_relax();
			
 
				+	if (!next) {
			
 
				+		while (!(next = READ_ONCE(node->next)))
			
 
				+			cpu_relax();
			
 
				+	}
			
 
				 
			
 
				 	arch_mcs_spin_unlock_contended(&next->locked);
			
 
				 	pv_kick_node(lock, next);
			
@@ -462,7 +508,7 @@ EXPORT_SYMBOL(queued_spin_lock_slowpath);
 
				 #undef pv_init_node
			
 
				 #undef pv_wait_node
			
 
				 #undef pv_kick_node
			
 
				-#undef pv_wait_head
			
 
				+#undef pv_wait_head_or_lock
			
 
				 
			
 
				 #undef  queued_spin_lock_slowpath
			
 
				 #define queued_spin_lock_slowpath	__pv_queued_spin_lock_slowpath
			
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,6 +22,20 @@
 
				 
			
 
				 #define _Q_SLOW_VAL	(3U << _Q_LOCKED_OFFSET)
			
 
				 
			
 
				+/*
			
 
				+ * Queue Node Adaptive Spinning
			
 
				+ *
			
 
				+ * A queue node vCPU will stop spinning if the vCPU in the previous node is
			
 
				+ * not running. The one lock stealing attempt allowed at slowpath entry
			
 
				+ * mitigates the slight slowdown for non-overcommitted guest with this
			
 
				+ * aggressive wait-early mechanism.
			
 
				+ *
			
 
				+ * The status of the previous node will be checked at fixed interval
			
 
				+ * controlled by PV_PREV_CHECK_MASK. This is to ensure that we won't
			
 
				+ * pound on the cacheline of the previous node too heavily.
			
 
				+ */
			
 
				+#define PV_PREV_CHECK_MASK	0xff
			
 
				+
			
 
				 /*
			
 
				  * Queue node uses: vcpu_running & vcpu_halted.
			
 
				  * Queue head uses: vcpu_running & vcpu_hashed.
			
@@ -40,6 +54,94 @@ struct pv_node {
 
				 	u8			state;
			
 
				 };
			
 
				 
			
 
				+/*
			
 
				+ * By replacing the regular queued_spin_trylock() with the function below,
			
 
				+ * it will be called once when a lock waiter enter the PV slowpath before
			
 
				+ * being queued. By allowing one lock stealing attempt here when the pending
			
 
				+ * bit is off, it helps to reduce the performance impact of lock waiter
			
 
				+ * preemption without the drawback of lock starvation.
			
 
				+ */
			
 
				+#define queued_spin_trylock(l)	pv_queued_spin_steal_lock(l)
			
 
				+static inline bool pv_queued_spin_steal_lock(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	return !(atomic_read(&lock->val) & _Q_LOCKED_PENDING_MASK) &&
			
 
				+		(cmpxchg(&l->locked, 0, _Q_LOCKED_VAL) == 0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The pending bit is used by the queue head vCPU to indicate that it
			
 
				+ * is actively spinning on the lock and no lock stealing is allowed.
			
 
				+ */
			
 
				+#if _Q_PENDING_BITS == 8
			
 
				+static __always_inline void set_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	WRITE_ONCE(l->pending, 1);
			
 
				+}
			
 
				+
			
 
				+static __always_inline void clear_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	WRITE_ONCE(l->pending, 0);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * The pending bit check in pv_queued_spin_steal_lock() isn't a memory
			
 
				+ * barrier. Therefore, an atomic cmpxchg() is used to acquire the lock
			
 
				+ * just to be sure that it will get it.
			
 
				+ */
			
 
				+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	return !READ_ONCE(l->locked) &&
			
 
				+	       (cmpxchg(&l->locked_pending, _Q_PENDING_VAL, _Q_LOCKED_VAL)
			
 
				+			== _Q_PENDING_VAL);
			
 
				+}
			
 
				+#else /* _Q_PENDING_BITS == 8 */
			
 
				+static __always_inline void set_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	atomic_set_mask(_Q_PENDING_VAL, &lock->val);
			
 
				+}
			
 
				+
			
 
				+static __always_inline void clear_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	atomic_clear_mask(_Q_PENDING_VAL, &lock->val);
			
 
				+}
			
 
				+
			
 
				+static __always_inline int trylock_clear_pending(struct qspinlock *lock)
			
 
				+{
			
 
				+	int val = atomic_read(&lock->val);
			
 
				+
			
 
				+	for (;;) {
			
 
				+		int old, new;
			
 
				+
			
 
				+		if (val  & _Q_LOCKED_MASK)
			
 
				+			break;
			
 
				+
			
 
				+		/*
			
 
				+		 * Try to clear pending bit & set locked bit
			
 
				+		 */
			
 
				+		old = val;
			
 
				+		new = (val & ~_Q_PENDING_MASK) | _Q_LOCKED_VAL;
			
 
				+		val = atomic_cmpxchg(&lock->val, old, new);
			
 
				+
			
 
				+		if (val == old)
			
 
				+			return 1;
			
 
				+	}
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif /* _Q_PENDING_BITS == 8 */
			
 
				+
			
 
				+/*
			
 
				+ * Include queued spinlock statistics code
			
 
				+ */
			
 
				+#include "qspinlock_stat.h"
			
 
				+
			
 
				 /*
			
 
				  * Lock and MCS node addresses hash table for fast lookup
			
 
				  *
			
@@ -100,10 +202,13 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
 
				 {
			
 
				 	unsigned long offset, hash = hash_ptr(lock, pv_lock_hash_bits);
			
 
				 	struct pv_hash_entry *he;
			
 
				+	int hopcnt = 0;
			
 
				 
			
 
				 	for_each_hash_entry(he, offset, hash) {
			
 
				+		hopcnt++;
			
 
				 		if (!cmpxchg(&he->lock, NULL, lock)) {
			
 
				 			WRITE_ONCE(he->node, node);
			
 
				+			qstat_hop(hopcnt);
			
 
				 			return &he->lock;
			
 
				 		}
			
 
				 	}
			
@@ -143,6 +248,20 @@ static struct pv_node *pv_unhash(struct qspinlock *lock)
 
				 	BUG();
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Return true if when it is time to check the previous node which is not
			
 
				+ * in a running state.
			
 
				+ */
			
 
				+static inline bool
			
 
				+pv_wait_early(struct pv_node *prev, int loop)
			
 
				+{
			
 
				+
			
 
				+	if ((loop & PV_PREV_CHECK_MASK) != 0)
			
 
				+		return false;
			
 
				+
			
 
				+	return READ_ONCE(prev->state) != vcpu_running;
			
 
				+}
			
 
				+
			
 
				 /*
			
 
				  * Initialize the PV part of the mcs_spinlock node.
			
 
				  */
			
@@ -161,15 +280,23 @@ static void pv_init_node(struct mcs_spinlock *node)
 
				  * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
			
 
				  * behalf.
			
 
				  */
			
 
				-static void pv_wait_node(struct mcs_spinlock *node)
			
 
				+static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
			
 
				 {
			
 
				 	struct pv_node *pn = (struct pv_node *)node;
			
 
				+	struct pv_node *pp = (struct pv_node *)prev;
			
 
				+	int waitcnt = 0;
			
 
				 	int loop;
			
 
				+	bool wait_early;
			
 
				 
			
 
				-	for (;;) {
			
 
				-		for (loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				+	/* waitcnt processing will be compiled out if !QUEUED_LOCK_STAT */
			
 
				+	for (;; waitcnt++) {
			
 
				+		for (wait_early = false, loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				 			if (READ_ONCE(node->locked))
			
 
				 				return;
			
 
				+			if (pv_wait_early(pp, loop)) {
			
 
				+				wait_early = true;
			
 
				+				break;
			
 
				+			}
			
 
				 			cpu_relax();
			
 
				 		}
			
 
				 
			
@@ -184,12 +311,17 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 		 */
			
 
				 		smp_store_mb(pn->state, vcpu_halted);
			
 
				 
			
 
				-		if (!READ_ONCE(node->locked))
			
 
				+		if (!READ_ONCE(node->locked)) {
			
 
				+			qstat_inc(qstat_pv_wait_node, true);
			
 
				+			qstat_inc(qstat_pv_wait_again, waitcnt);
			
 
				+			qstat_inc(qstat_pv_wait_early, wait_early);
			
 
				 			pv_wait(&pn->state, vcpu_halted);
			
 
				+		}
			
 
				 
			
 
				 		/*
			
 
				-		 * If pv_kick_node() changed us to vcpu_hashed, retain that value
			
 
				-		 * so that pv_wait_head() knows to not also try to hash this lock.
			
 
				+		 * If pv_kick_node() changed us to vcpu_hashed, retain that
			
 
				+		 * value so that pv_wait_head_or_lock() knows to not also try
			
 
				+		 * to hash this lock.
			
 
				 		 */
			
 
				 		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
			
 
				 
			
@@ -200,6 +332,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 		 * So it is better to spin for a while in the hope that the
			
 
				 		 * MCS lock will be released soon.
			
 
				 		 */
			
 
				+		qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked));
			
 
				 	}
			
 
				 
			
 
				 	/*
			
@@ -212,8 +345,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 /*
			
 
				  * Called after setting next->locked = 1 when we're the lock owner.
			
 
				  *
			
 
				- * Instead of waking the waiters stuck in pv_wait_node() advance their state such
			
 
				- * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
			
 
				+ * Instead of waking the waiters stuck in pv_wait_node() advance their state
			
 
				+ * such that they're waiting in pv_wait_head_or_lock(), this avoids a
			
 
				+ * wake/sleep cycle.
			
 
				  */
			
 
				 static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
			
 
				 {
			
@@ -242,14 +376,19 @@ static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Wait for l->locked to become clear; halt the vcpu after a short spin.
			
 
				+ * Wait for l->locked to become clear and acquire the lock;
			
 
				+ * halt the vcpu after a short spin.
			
 
				  * __pv_queued_spin_unlock() will wake us.
			
 
				+ *
			
 
				+ * The current value of the lock will be returned for additional processing.
			
 
				  */
			
 
				-static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
			
 
				+static u32
			
 
				+pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
			
 
				 {
			
 
				 	struct pv_node *pn = (struct pv_node *)node;
			
 
				 	struct __qspinlock *l = (void *)lock;
			
 
				 	struct qspinlock **lp = NULL;
			
 
				+	int waitcnt = 0;
			
 
				 	int loop;
			
 
				 
			
 
				 	/*
			
@@ -259,12 +398,25 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 
				 	if (READ_ONCE(pn->state) == vcpu_hashed)
			
 
				 		lp = (struct qspinlock **)1;
			
 
				 
			
 
				-	for (;;) {
			
 
				+	for (;; waitcnt++) {
			
 
				+		/*
			
 
				+		 * Set correct vCPU state to be used by queue node wait-early
			
 
				+		 * mechanism.
			
 
				+		 */
			
 
				+		WRITE_ONCE(pn->state, vcpu_running);
			
 
				+
			
 
				+		/*
			
 
				+		 * Set the pending bit in the active lock spinning loop to
			
 
				+		 * disable lock stealing before attempting to acquire the lock.
			
 
				+		 */
			
 
				+		set_pending(lock);
			
 
				 		for (loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				-			if (!READ_ONCE(l->locked))
			
 
				-				return;
			
 
				+			if (trylock_clear_pending(lock))
			
 
				+				goto gotlock;
			
 
				 			cpu_relax();
			
 
				 		}
			
 
				+		clear_pending(lock);
			
 
				+
			
 
				 
			
 
				 		if (!lp) { /* ONCE */
			
 
				 			lp = pv_hash(lock, pn);
			
@@ -280,51 +432,50 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 
				 			 *
			
 
				 			 * Matches the smp_rmb() in __pv_queued_spin_unlock().
			
 
				 			 */
			
 
				-			if (!cmpxchg(&l->locked, _Q_LOCKED_VAL, _Q_SLOW_VAL)) {
			
 
				+			if (xchg(&l->locked, _Q_SLOW_VAL) == 0) {
			
 
				 				/*
			
 
				-				 * The lock is free and _Q_SLOW_VAL has never
			
 
				-				 * been set. Therefore we need to unhash before
			
 
				-				 * getting the lock.
			
 
				+				 * The lock was free and now we own the lock.
			
 
				+				 * Change the lock value back to _Q_LOCKED_VAL
			
 
				+				 * and unhash the table.
			
 
				 				 */
			
 
				+				WRITE_ONCE(l->locked, _Q_LOCKED_VAL);
			
 
				 				WRITE_ONCE(*lp, NULL);
			
 
				-				return;
			
 
				+				goto gotlock;
			
 
				 			}
			
 
				 		}
			
 
				+		WRITE_ONCE(pn->state, vcpu_halted);
			
 
				+		qstat_inc(qstat_pv_wait_head, true);
			
 
				+		qstat_inc(qstat_pv_wait_again, waitcnt);
			
 
				 		pv_wait(&l->locked, _Q_SLOW_VAL);
			
 
				 
			
 
				 		/*
			
 
				 		 * The unlocker should have freed the lock before kicking the
			
 
				 		 * CPU. So if the lock is still not free, it is a spurious
			
 
				-		 * wakeup and so the vCPU should wait again after spinning for
			
 
				-		 * a while.
			
 
				+		 * wakeup or another vCPU has stolen the lock. The current
			
 
				+		 * vCPU should spin again.
			
 
				 		 */
			
 
				+		qstat_inc(qstat_pv_spurious_wakeup, READ_ONCE(l->locked));
			
 
				 	}
			
 
				 
			
 
				 	/*
			
 
				-	 * Lock is unlocked now; the caller will acquire it without waiting.
			
 
				-	 * As with pv_wait_node() we rely on the caller to do a load-acquire
			
 
				-	 * for us.
			
 
				+	 * The cmpxchg() or xchg() call before coming here provides the
			
 
				+	 * acquire semantics for locking. The dummy ORing of _Q_LOCKED_VAL
			
 
				+	 * here is to indicate to the compiler that the value will always
			
 
				+	 * be nozero to enable better code optimization.
			
 
				 	 */
			
 
				+gotlock:
			
 
				+	return (u32)(atomic_read(&lock->val) | _Q_LOCKED_VAL);
			
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * PV version of the unlock function to be used in stead of
			
 
				- * queued_spin_unlock().
			
 
				+ * PV versions of the unlock fastpath and slowpath functions to be used
			
 
				+ * instead of queued_spin_unlock().
			
 
				  */
			
 
				-__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
			
 
				+__visible void
			
 
				+__pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
			
 
				 {
			
 
				 	struct __qspinlock *l = (void *)lock;
			
 
				 	struct pv_node *node;
			
 
				-	u8 locked;
			
 
				-
			
 
				-	/*
			
 
				-	 * We must not unlock if SLOW, because in that case we must first
			
 
				-	 * unhash. Otherwise it would be possible to have multiple @lock
			
 
				-	 * entries, which would be BAD.
			
 
				-	 */
			
 
				-	locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
			
 
				-	if (likely(locked == _Q_LOCKED_VAL))
			
 
				-		return;
			
 
				 
			
 
				 	if (unlikely(locked != _Q_SLOW_VAL)) {
			
 
				 		WARN(!debug_locks_silent,
			
@@ -338,7 +489,7 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 
				 	 * so we need a barrier to order the read of the node data in
			
 
				 	 * pv_unhash *after* we've read the lock being _Q_SLOW_VAL.
			
 
				 	 *
			
 
				-	 * Matches the cmpxchg() in pv_wait_head() setting _Q_SLOW_VAL.
			
 
				+	 * Matches the cmpxchg() in pv_wait_head_or_lock() setting _Q_SLOW_VAL.
			
 
				 	 */
			
 
				 	smp_rmb();
			
 
				 
			
@@ -361,14 +512,35 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 
				 	 * vCPU is harmless other than the additional latency in completing
			
 
				 	 * the unlock.
			
 
				 	 */
			
 
				+	qstat_inc(qstat_pv_kick_unlock, true);
			
 
				 	pv_kick(node->cpu);
			
 
				 }
			
 
				+
			
 
				 /*
			
 
				  * Include the architecture specific callee-save thunk of the
			
 
				  * __pv_queued_spin_unlock(). This thunk is put together with
			
 
				- * __pv_queued_spin_unlock() near the top of the file to make sure
			
 
				- * that the callee-save thunk and the real unlock function are close
			
 
				- * to each other sharing consecutive instruction cachelines.
			
 
				+ * __pv_queued_spin_unlock() to make the callee-save thunk and the real unlock
			
 
				+ * function close to each other sharing consecutive instruction cachelines.
			
 
				+ * Alternatively, architecture specific version of __pv_queued_spin_unlock()
			
 
				+ * can be defined.
			
 
				  */
			
 
				 #include <asm/qspinlock_paravirt.h>
			
 
				 
			
 
				+#ifndef __pv_queued_spin_unlock
			
 
				+__visible void __pv_queued_spin_unlock(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+	u8 locked;
			
 
				+
			
 
				+	/*
			
 
				+	 * We must not unlock if SLOW, because in that case we must first
			
 
				+	 * unhash. Otherwise it would be possible to have multiple @lock
			
 
				+	 * entries, which would be BAD.
			
 
				+	 */
			
 
				+	locked = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0);
			
 
				+	if (likely(locked == _Q_LOCKED_VAL))
			
 
				+		return;
			
 
				+
			
 
				+	__pv_queued_spin_unlock_slowpath(lock, locked);
			
 
				+}
			
 
				+#endif /* __pv_queued_spin_unlock */
			
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -0,0 +1,300 @@
 
				+/*
			
 
				+ * This program is free software; you can redistribute it and/or modify
			
 
				+ * it under the terms of the GNU General Public License as published by
			
 
				+ * the Free Software Foundation; either version 2 of the License, or
			
 
				+ * (at your option) any later version.
			
 
				+ *
			
 
				+ * This program is distributed in the hope that it will be useful,
			
 
				+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
			
 
				+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
			
 
				+ * GNU General Public License for more details.
			
 
				+ *
			
 
				+ * Authors: Waiman Long <waiman.long@hpe.com>
			
 
				+ */
			
 
				+
			
 
				+/*
			
 
				+ * When queued spinlock statistical counters are enabled, the following
			
 
				+ * debugfs files will be created for reporting the counter values:
			
 
				+ *
			
 
				+ * <debugfs>/qlockstat/
			
 
				+ *   pv_hash_hops	- average # of hops per hashing operation
			
 
				+ *   pv_kick_unlock	- # of vCPU kicks issued at unlock time
			
 
				+ *   pv_kick_wake	- # of vCPU kicks used for computing pv_latency_wake
			
 
				+ *   pv_latency_kick	- average latency (ns) of vCPU kick operation
			
 
				+ *   pv_latency_wake	- average latency (ns) from vCPU kick to wakeup
			
 
				+ *   pv_lock_stealing	- # of lock stealing operations
			
 
				+ *   pv_spurious_wakeup	- # of spurious wakeups
			
 
				+ *   pv_wait_again	- # of vCPU wait's that happened after a vCPU kick
			
 
				+ *   pv_wait_early	- # of early vCPU wait's
			
 
				+ *   pv_wait_head	- # of vCPU wait's at the queue head
			
 
				+ *   pv_wait_node	- # of vCPU wait's at a non-head queue node
			
 
				+ *
			
 
				+ * Writing to the "reset_counters" file will reset all the above counter
			
 
				+ * values.
			
 
				+ *
			
 
				+ * These statistical counters are implemented as per-cpu variables which are
			
 
				+ * summed and computed whenever the corresponding debugfs files are read. This
			
 
				+ * minimizes added overhead making the counters usable even in a production
			
 
				+ * environment.
			
 
				+ *
			
 
				+ * There may be slight difference between pv_kick_wake and pv_kick_unlock.
			
 
				+ */
			
 
				+enum qlock_stats {
			
 
				+	qstat_pv_hash_hops,
			
 
				+	qstat_pv_kick_unlock,
			
 
				+	qstat_pv_kick_wake,
			
 
				+	qstat_pv_latency_kick,
			
 
				+	qstat_pv_latency_wake,
			
 
				+	qstat_pv_lock_stealing,
			
 
				+	qstat_pv_spurious_wakeup,
			
 
				+	qstat_pv_wait_again,
			
 
				+	qstat_pv_wait_early,
			
 
				+	qstat_pv_wait_head,
			
 
				+	qstat_pv_wait_node,
			
 
				+	qstat_num,	/* Total number of statistical counters */
			
 
				+	qstat_reset_cnts = qstat_num,
			
 
				+};
			
 
				+
			
 
				+#ifdef CONFIG_QUEUED_LOCK_STAT
			
 
				+/*
			
 
				+ * Collect pvqspinlock statistics
			
 
				+ */
			
 
				+#include <linux/debugfs.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/fs.h>
			
 
				+
			
 
				+static const char * const qstat_names[qstat_num + 1] = {
			
 
				+	[qstat_pv_hash_hops]	   = "pv_hash_hops",
			
 
				+	[qstat_pv_kick_unlock]     = "pv_kick_unlock",
			
 
				+	[qstat_pv_kick_wake]       = "pv_kick_wake",
			
 
				+	[qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
			
 
				+	[qstat_pv_latency_kick]	   = "pv_latency_kick",
			
 
				+	[qstat_pv_latency_wake]    = "pv_latency_wake",
			
 
				+	[qstat_pv_lock_stealing]   = "pv_lock_stealing",
			
 
				+	[qstat_pv_wait_again]      = "pv_wait_again",
			
 
				+	[qstat_pv_wait_early]      = "pv_wait_early",
			
 
				+	[qstat_pv_wait_head]       = "pv_wait_head",
			
 
				+	[qstat_pv_wait_node]       = "pv_wait_node",
			
 
				+	[qstat_reset_cnts]         = "reset_counters",
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Per-cpu counters
			
 
				+ */
			
 
				+static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
			
 
				+static DEFINE_PER_CPU(u64, pv_kick_time);
			
 
				+
			
 
				+/*
			
 
				+ * Function to read and return the qlock statistical counter values
			
 
				+ *
			
 
				+ * The following counters are handled specially:
			
 
				+ * 1. qstat_pv_latency_kick
			
 
				+ *    Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
			
 
				+ * 2. qstat_pv_latency_wake
			
 
				+ *    Average wake latency (ns) = pv_latency_wake/pv_kick_wake
			
 
				+ * 3. qstat_pv_hash_hops
			
 
				+ *    Average hops/hash = pv_hash_hops/pv_kick_unlock
			
 
				+ */
			
 
				+static ssize_t qstat_read(struct file *file, char __user *user_buf,
			
 
				+			  size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	char buf[64];
			
 
				+	int cpu, counter, len;
			
 
				+	u64 stat = 0, kicks = 0;
			
 
				+
			
 
				+	/*
			
 
				+	 * Get the counter ID stored in file->f_inode->i_private
			
 
				+	 */
			
 
				+	if (!file->f_inode) {
			
 
				+		WARN_ON_ONCE(1);
			
 
				+		return -EBADF;
			
 
				+	}
			
 
				+	counter = (long)(file->f_inode->i_private);
			
 
				+
			
 
				+	if (counter >= qstat_num)
			
 
				+		return -EBADF;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		stat += per_cpu(qstats[counter], cpu);
			
 
				+		/*
			
 
				+		 * Need to sum additional counter for some of them
			
 
				+		 */
			
 
				+		switch (counter) {
			
 
				+
			
 
				+		case qstat_pv_latency_kick:
			
 
				+		case qstat_pv_hash_hops:
			
 
				+			kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu);
			
 
				+			break;
			
 
				+
			
 
				+		case qstat_pv_latency_wake:
			
 
				+			kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu);
			
 
				+			break;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	if (counter == qstat_pv_hash_hops) {
			
 
				+		u64 frac;
			
 
				+
			
 
				+		frac = 100ULL * do_div(stat, kicks);
			
 
				+		frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
			
 
				+
			
 
				+		/*
			
 
				+		 * Return a X.XX decimal number
			
 
				+		 */
			
 
				+		len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac);
			
 
				+	} else {
			
 
				+		/*
			
 
				+		 * Round to the nearest ns
			
 
				+		 */
			
 
				+		if ((counter == qstat_pv_latency_kick) ||
			
 
				+		    (counter == qstat_pv_latency_wake)) {
			
 
				+			stat = 0;
			
 
				+			if (kicks)
			
 
				+				stat = DIV_ROUND_CLOSEST_ULL(stat, kicks);
			
 
				+		}
			
 
				+		len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat);
			
 
				+	}
			
 
				+
			
 
				+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Function to handle write request
			
 
				+ *
			
 
				+ * When counter = reset_cnts, reset all the counter values.
			
 
				+ * Since the counter updates aren't atomic, the resetting is done twice
			
 
				+ * to make sure that the counters are very likely to be all cleared.
			
 
				+ */
			
 
				+static ssize_t qstat_write(struct file *file, const char __user *user_buf,
			
 
				+			   size_t count, loff_t *ppos)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	/*
			
 
				+	 * Get the counter ID stored in file->f_inode->i_private
			
 
				+	 */
			
 
				+	if (!file->f_inode) {
			
 
				+		WARN_ON_ONCE(1);
			
 
				+		return -EBADF;
			
 
				+	}
			
 
				+	if ((long)(file->f_inode->i_private) != qstat_reset_cnts)
			
 
				+		return count;
			
 
				+
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				+		int i;
			
 
				+		unsigned long *ptr = per_cpu_ptr(qstats, cpu);
			
 
				+
			
 
				+		for (i = 0 ; i < qstat_num; i++)
			
 
				+			WRITE_ONCE(ptr[i], 0);
			
 
				+		for (i = 0 ; i < qstat_num; i++)
			
 
				+			WRITE_ONCE(ptr[i], 0);
			
 
				+	}
			
 
				+	return count;
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Debugfs data structures
			
 
				+ */
			
 
				+static const struct file_operations fops_qstat = {
			
 
				+	.read = qstat_read,
			
 
				+	.write = qstat_write,
			
 
				+	.llseek = default_llseek,
			
 
				+};
			
 
				+
			
 
				+/*
			
 
				+ * Initialize debugfs for the qspinlock statistical counters
			
 
				+ */
			
 
				+static int __init init_qspinlock_stat(void)
			
 
				+{
			
 
				+	struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
			
 
				+	int i;
			
 
				+
			
 
				+	if (!d_qstat) {
			
 
				+		pr_warn("Could not create 'qlockstat' debugfs directory\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Create the debugfs files
			
 
				+	 *
			
 
				+	 * As reading from and writing to the stat files can be slow, only
			
 
				+	 * root is allowed to do the read/write to limit impact to system
			
 
				+	 * performance.
			
 
				+	 */
			
 
				+	for (i = 0; i < qstat_num; i++)
			
 
				+		debugfs_create_file(qstat_names[i], 0400, d_qstat,
			
 
				+				   (void *)(long)i, &fops_qstat);
			
 
				+
			
 
				+	debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
			
 
				+			   (void *)(long)qstat_reset_cnts, &fops_qstat);
			
 
				+	return 0;
			
 
				+}
			
 
				+fs_initcall(init_qspinlock_stat);
			
 
				+
			
 
				+/*
			
 
				+ * Increment the PV qspinlock statistical counters
			
 
				+ */
			
 
				+static inline void qstat_inc(enum qlock_stats stat, bool cond)
			
 
				+{
			
 
				+	if (cond)
			
 
				+		this_cpu_inc(qstats[stat]);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * PV hash hop count
			
 
				+ */
			
 
				+static inline void qstat_hop(int hopcnt)
			
 
				+{
			
 
				+	this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Replacement function for pv_kick()
			
 
				+ */
			
 
				+static inline void __pv_kick(int cpu)
			
 
				+{
			
 
				+	u64 start = sched_clock();
			
 
				+
			
 
				+	per_cpu(pv_kick_time, cpu) = start;
			
 
				+	pv_kick(cpu);
			
 
				+	this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * Replacement function for pv_wait()
			
 
				+ */
			
 
				+static inline void __pv_wait(u8 *ptr, u8 val)
			
 
				+{
			
 
				+	u64 *pkick_time = this_cpu_ptr(&pv_kick_time);
			
 
				+
			
 
				+	*pkick_time = 0;
			
 
				+	pv_wait(ptr, val);
			
 
				+	if (*pkick_time) {
			
 
				+		this_cpu_add(qstats[qstat_pv_latency_wake],
			
 
				+			     sched_clock() - *pkick_time);
			
 
				+		qstat_inc(qstat_pv_kick_wake, true);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+#define pv_kick(c)	__pv_kick(c)
			
 
				+#define pv_wait(p, v)	__pv_wait(p, v)
			
 
				+
			
 
				+/*
			
 
				+ * PV unfair trylock count tracking function
			
 
				+ */
			
 
				+static inline int qstat_spin_steal_lock(struct qspinlock *lock)
			
 
				+{
			
 
				+	int ret = pv_queued_spin_steal_lock(lock);
			
 
				+
			
 
				+	qstat_inc(qstat_pv_lock_stealing, ret);
			
 
				+	return ret;
			
 
				+}
			
 
				+#undef  queued_spin_trylock
			
 
				+#define queued_spin_trylock(l)	qstat_spin_steal_lock(l)
			
 
				+
			
 
				+#else /* CONFIG_QUEUED_LOCK_STAT */
			
 
				+
			
 
				+static inline void qstat_inc(enum qlock_stats stat, bool cond)	{ }
			
 
				+static inline void qstat_hop(int hopcnt)			{ }
			
 
				+
			
 
				+#endif /* CONFIG_QUEUED_LOCK_STAT */
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1905,6 +1905,97 @@ static void ttwu_queue(struct task_struct *p, int cpu)
 
				 	raw_spin_unlock(&rq->lock);
			
 
				 }
			
 
				 
			
 
				+/*
			
 
				+ * Notes on Program-Order guarantees on SMP systems.
			
 
				+ *
			
 
				+ *  MIGRATION
			
 
				+ *
			
 
				+ * The basic program-order guarantee on SMP systems is that when a task [t]
			
 
				+ * migrates, all its activity on its old cpu [c0] happens-before any subsequent
			
 
				+ * execution on its new cpu [c1].
			
 
				+ *
			
 
				+ * For migration (of runnable tasks) this is provided by the following means:
			
 
				+ *
			
 
				+ *  A) UNLOCK of the rq(c0)->lock scheduling out task t
			
 
				+ *  B) migration for t is required to synchronize *both* rq(c0)->lock and
			
 
				+ *     rq(c1)->lock (if not at the same time, then in that order).
			
 
				+ *  C) LOCK of the rq(c1)->lock scheduling in task
			
 
				+ *
			
 
				+ * Transitivity guarantees that B happens after A and C after B.
			
 
				+ * Note: we only require RCpc transitivity.
			
 
				+ * Note: the cpu doing B need not be c0 or c1
			
 
				+ *
			
 
				+ * Example:
			
 
				+ *
			
 
				+ *   CPU0            CPU1            CPU2
			
 
				+ *
			
 
				+ *   LOCK rq(0)->lock
			
 
				+ *   sched-out X
			
 
				+ *   sched-in Y
			
 
				+ *   UNLOCK rq(0)->lock
			
 
				+ *
			
 
				+ *                                   LOCK rq(0)->lock // orders against CPU0
			
 
				+ *                                   dequeue X
			
 
				+ *                                   UNLOCK rq(0)->lock
			
 
				+ *
			
 
				+ *                                   LOCK rq(1)->lock
			
 
				+ *                                   enqueue X
			
 
				+ *                                   UNLOCK rq(1)->lock
			
 
				+ *
			
 
				+ *                   LOCK rq(1)->lock // orders against CPU2
			
 
				+ *                   sched-out Z
			
 
				+ *                   sched-in X
			
 
				+ *                   UNLOCK rq(1)->lock
			
 
				+ *
			
 
				+ *
			
 
				+ *  BLOCKING -- aka. SLEEP + WAKEUP
			
 
				+ *
			
 
				+ * For blocking we (obviously) need to provide the same guarantee as for
			
 
				+ * migration. However the means are completely different as there is no lock
			
 
				+ * chain to provide order. Instead we do:
			
 
				+ *
			
 
				+ *   1) smp_store_release(X->on_cpu, 0)
			
 
				+ *   2) smp_cond_acquire(!X->on_cpu)
			
 
				+ *
			
 
				+ * Example:
			
 
				+ *
			
 
				+ *   CPU0 (schedule)  CPU1 (try_to_wake_up) CPU2 (schedule)
			
 
				+ *
			
 
				+ *   LOCK rq(0)->lock LOCK X->pi_lock
			
 
				+ *   dequeue X
			
 
				+ *   sched-out X
			
 
				+ *   smp_store_release(X->on_cpu, 0);
			
 
				+ *
			
 
				+ *                    smp_cond_acquire(!X->on_cpu);
			
 
				+ *                    X->state = WAKING
			
 
				+ *                    set_task_cpu(X,2)
			
 
				+ *
			
 
				+ *                    LOCK rq(2)->lock
			
 
				+ *                    enqueue X
			
 
				+ *                    X->state = RUNNING
			
 
				+ *                    UNLOCK rq(2)->lock
			
 
				+ *
			
 
				+ *                                          LOCK rq(2)->lock // orders against CPU1
			
 
				+ *                                          sched-out Z
			
 
				+ *                                          sched-in X
			
 
				+ *                                          UNLOCK rq(2)->lock
			
 
				+ *
			
 
				+ *                    UNLOCK X->pi_lock
			
 
				+ *   UNLOCK rq(0)->lock
			
 
				+ *
			
 
				+ *
			
 
				+ * However; for wakeups there is a second guarantee we must provide, namely we
			
 
				+ * must observe the state that lead to our wakeup. That is, not only must our
			
 
				+ * task observe its own prior state, it must also observe the stores prior to
			
 
				+ * its wakeup.
			
 
				+ *
			
 
				+ * This means that any means of doing remote wakeups must order the CPU doing
			
 
				+ * the wakeup against the CPU the task is going to end up running on. This,
			
 
				+ * however, is already required for the regular Program-Order guarantee above,
			
 
				+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				 /**
			
 
				  * try_to_wake_up - wake up a thread
			
 
				  * @p: the thread to be awakened
			
@@ -1968,19 +2059,13 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
				 	/*
			
 
				 	 * If the owning (remote) cpu is still in the middle of schedule() with
			
 
				 	 * this task as prev, wait until its done referencing the task.
			
 
				-	 */
			
 
				-	while (p->on_cpu)
			
 
				-		cpu_relax();
			
 
				-	/*
			
 
				-	 * Combined with the control dependency above, we have an effective
			
 
				-	 * smp_load_acquire() without the need for full barriers.
			
 
				 	 *
			
 
				 	 * Pairs with the smp_store_release() in finish_lock_switch().
			
 
				 	 *
			
 
				 	 * This ensures that tasks getting woken will be fully ordered against
			
 
				 	 * their previous state and preserve Program Order.
			
 
				 	 */
			
 
				-	smp_rmb();
			
 
				+	smp_cond_acquire(!p->on_cpu);
			
 
				 
			
 
				 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
			
 
				 	p->state = TASK_WAKING;
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1076,7 +1076,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 
				 	 * In particular, the load of prev->state in finish_task_switch() must
			
 
				 	 * happen before this.
			
 
				 	 *
			
 
				-	 * Pairs with the control dependency and rmb in try_to_wake_up().
			
 
				+	 * Pairs with the smp_cond_acquire() in try_to_wake_up().
			
 
				 	 */
			
 
				 	smp_store_release(&prev->on_cpu, 0);
			
 
				 #endif
			
--- a/lib/atomic64_test.c
+++ b/lib/atomic64_test.c
@@ -27,6 +27,65 @@ do {								\
 
				 		(unsigned long long)r);				\
			
 
				 } while (0)
			
 
				 
			
 
				+/*
			
 
				+ * Test for a atomic operation family,
			
 
				+ * @test should be a macro accepting parameters (bit, op, ...)
			
 
				+ */
			
 
				+
			
 
				+#define FAMILY_TEST(test, bit, op, args...)	\
			
 
				+do {						\
			
 
				+	test(bit, op, ##args);		\
			
 
				+	test(bit, op##_acquire, ##args);	\
			
 
				+	test(bit, op##_release, ##args);	\
			
 
				+	test(bit, op##_relaxed, ##args);	\
			
 
				+} while (0)
			
 
				+
			
 
				+#define TEST_RETURN(bit, op, c_op, val)				\
			
 
				+do {								\
			
 
				+	atomic##bit##_set(&v, v0);				\
			
 
				+	r = v0;							\
			
 
				+	r c_op val;						\
			
 
				+	BUG_ON(atomic##bit##_##op(val, &v) != r);		\
			
 
				+	BUG_ON(atomic##bit##_read(&v) != r);			\
			
 
				+} while (0)
			
 
				+
			
 
				+#define RETURN_FAMILY_TEST(bit, op, c_op, val)			\
			
 
				+do {								\
			
 
				+	FAMILY_TEST(TEST_RETURN, bit, op, c_op, val);		\
			
 
				+} while (0)
			
 
				+
			
 
				+#define TEST_ARGS(bit, op, init, ret, expect, args...)		\
			
 
				+do {								\
			
 
				+	atomic##bit##_set(&v, init);				\
			
 
				+	BUG_ON(atomic##bit##_##op(&v, ##args) != ret);		\
			
 
				+	BUG_ON(atomic##bit##_read(&v) != expect);		\
			
 
				+} while (0)
			
 
				+
			
 
				+#define XCHG_FAMILY_TEST(bit, init, new)				\
			
 
				+do {									\
			
 
				+	FAMILY_TEST(TEST_ARGS, bit, xchg, init, init, new, new);	\
			
 
				+} while (0)
			
 
				+
			
 
				+#define CMPXCHG_FAMILY_TEST(bit, init, new, wrong)			\
			
 
				+do {									\
			
 
				+	FAMILY_TEST(TEST_ARGS, bit, cmpxchg, 				\
			
 
				+			init, init, new, init, new);			\
			
 
				+	FAMILY_TEST(TEST_ARGS, bit, cmpxchg,				\
			
 
				+			init, init, init, wrong, new);			\
			
 
				+} while (0)
			
 
				+
			
 
				+#define INC_RETURN_FAMILY_TEST(bit, i)			\
			
 
				+do {							\
			
 
				+	FAMILY_TEST(TEST_ARGS, bit, inc_return,		\
			
 
				+			i, (i) + one, (i) + one);	\
			
 
				+} while (0)
			
 
				+
			
 
				+#define DEC_RETURN_FAMILY_TEST(bit, i)			\
			
 
				+do {							\
			
 
				+	FAMILY_TEST(TEST_ARGS, bit, dec_return,		\
			
 
				+			i, (i) - one, (i) - one);	\
			
 
				+} while (0)
			
 
				+
			
 
				 static __init void test_atomic(void)
			
 
				 {
			
 
				 	int v0 = 0xaaa31337;
			
@@ -45,6 +104,18 @@ static __init void test_atomic(void)
 
				 	TEST(, and, &=, v1);
			
 
				 	TEST(, xor, ^=, v1);
			
 
				 	TEST(, andnot, &= ~, v1);
			
 
				+
			
 
				+	RETURN_FAMILY_TEST(, add_return, +=, onestwos);
			
 
				+	RETURN_FAMILY_TEST(, add_return, +=, -one);
			
 
				+	RETURN_FAMILY_TEST(, sub_return, -=, onestwos);
			
 
				+	RETURN_FAMILY_TEST(, sub_return, -=, -one);
			
 
				+
			
 
				+	INC_RETURN_FAMILY_TEST(, v0);
			
 
				+	DEC_RETURN_FAMILY_TEST(, v0);
			
 
				+
			
 
				+	XCHG_FAMILY_TEST(, v0, v1);
			
 
				+	CMPXCHG_FAMILY_TEST(, v0, v1, onestwos);
			
 
				+
			
 
				 }
			
 
				 
			
 
				 #define INIT(c) do { atomic64_set(&v, c); r = c; } while (0)
			
@@ -74,59 +145,26 @@ static __init void test_atomic64(void)
 
				 	TEST(64, xor, ^=, v1);
			
 
				 	TEST(64, andnot, &= ~, v1);
			
 
				 
			
 
				-	INIT(v0);
			
 
				-	r += onestwos;
			
 
				-	BUG_ON(atomic64_add_return(onestwos, &v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				-	INIT(v0);
			
 
				-	r += -one;
			
 
				-	BUG_ON(atomic64_add_return(-one, &v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				-	INIT(v0);
			
 
				-	r -= onestwos;
			
 
				-	BUG_ON(atomic64_sub_return(onestwos, &v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				-	INIT(v0);
			
 
				-	r -= -one;
			
 
				-	BUG_ON(atomic64_sub_return(-one, &v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				+	RETURN_FAMILY_TEST(64, add_return, +=, onestwos);
			
 
				+	RETURN_FAMILY_TEST(64, add_return, +=, -one);
			
 
				+	RETURN_FAMILY_TEST(64, sub_return, -=, onestwos);
			
 
				+	RETURN_FAMILY_TEST(64, sub_return, -=, -one);
			
 
				 
			
 
				 	INIT(v0);
			
 
				 	atomic64_inc(&v);
			
 
				 	r += one;
			
 
				 	BUG_ON(v.counter != r);
			
 
				 
			
 
				-	INIT(v0);
			
 
				-	r += one;
			
 
				-	BUG_ON(atomic64_inc_return(&v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				 	INIT(v0);
			
 
				 	atomic64_dec(&v);
			
 
				 	r -= one;
			
 
				 	BUG_ON(v.counter != r);
			
 
				 
			
 
				-	INIT(v0);
			
 
				-	r -= one;
			
 
				-	BUG_ON(atomic64_dec_return(&v) != r);
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				-	INIT(v0);
			
 
				-	BUG_ON(atomic64_xchg(&v, v1) != v0);
			
 
				-	r = v1;
			
 
				-	BUG_ON(v.counter != r);
			
 
				-
			
 
				-	INIT(v0);
			
 
				-	BUG_ON(atomic64_cmpxchg(&v, v0, v1) != v0);
			
 
				-	r = v1;
			
 
				-	BUG_ON(v.counter != r);
			
 
				+	INC_RETURN_FAMILY_TEST(64, v0);
			
 
				+	DEC_RETURN_FAMILY_TEST(64, v0);
			
 
				 
			
 
				-	INIT(v0);
			
 
				-	BUG_ON(atomic64_cmpxchg(&v, v2, v1) != v0);
			
 
				-	BUG_ON(v.counter != r);
			
 
				+	XCHG_FAMILY_TEST(64, v0, v1);
			
 
				+	CMPXCHG_FAMILY_TEST(64, v0, v1, v2);
			
 
				 
			
 
				 	INIT(v0);
			
 
				 	BUG_ON(atomic64_add_unless(&v, one, v0));