9 years ago · 1f03e8d291
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -305,21 +305,34 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
 
				 })
			
 
				 
			
 
				 /**
			
 
				- * smp_cond_acquire() - Spin wait for cond with ACQUIRE ordering
			
 
				+ * smp_cond_load_acquire() - (Spin) wait for cond with ACQUIRE ordering
			
 
				+ * @ptr: pointer to the variable to wait on
			
 
				  * @cond: boolean expression to wait for
			
 
				  *
			
 
				  * Equivalent to using smp_load_acquire() on the condition variable but employs
			
 
				  * the control dependency of the wait to reduce the barrier on many platforms.
			
 
				  *
			
 
				+ * Due to C lacking lambda expressions we load the value of *ptr into a
			
 
				+ * pre-named variable @VAL to be used in @cond.
			
 
				+ *
			
 
				  * The control dependency provides a LOAD->STORE order, the additional RMB
			
 
				  * provides LOAD->LOAD order, together they provide LOAD->{LOAD,STORE} order,
			
 
				  * aka. ACQUIRE.
			
 
				  */
			
 
				-#define smp_cond_acquire(cond)	do {		\
			
 
				-	while (!(cond))				\
			
 
				-		cpu_relax();			\
			
 
				-	smp_rmb(); /* ctrl + rmb := acquire */	\
			
 
				-} while (0)
			
 
				+#ifndef smp_cond_load_acquire
			
 
				+#define smp_cond_load_acquire(ptr, cond_expr) ({		\
			
 
				+	typeof(ptr) __PTR = (ptr);				\
			
 
				+	typeof(*ptr) VAL;					\
			
 
				+	for (;;) {						\
			
 
				+		VAL = READ_ONCE(*__PTR);			\
			
 
				+		if (cond_expr)					\
			
 
				+			break;					\
			
 
				+		cpu_relax();					\
			
 
				+	}							\
			
 
				+	smp_rmb(); /* ctrl + rmb := acquire */			\
			
 
				+	VAL;							\
			
 
				+})
			
 
				+#endif
			
 
				 
			
 
				 #endif /* __KERNEL__ */
			
 
				 
			
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -475,7 +475,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
				 	 * sequentiality; this is because not all clear_pending_set_locked()
			
 
				 	 * implementations imply full barriers.
			
 
				 	 */
			
 
				-	smp_cond_acquire(!(atomic_read(&lock->val) & _Q_LOCKED_MASK));
			
 
				+	smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_MASK));
			
 
				 
			
 
				 	/*
			
 
				 	 * take ownership and clear the pending bit.
			
@@ -562,7 +562,7 @@ queue:
 
				 	 *
			
 
				 	 * The PV pv_wait_head_or_lock function, if active, will acquire
			
 
				 	 * the lock and return a non-zero value. So we have to skip the
			
 
				-	 * smp_cond_acquire() call. As the next PV queue head hasn't been
			
 
				+	 * smp_cond_load_acquire() call. As the next PV queue head hasn't been
			
 
				 	 * designated yet, there is no way for the locked value to become
			
 
				 	 * _Q_SLOW_VAL. So both the set_locked() and the
			
 
				 	 * atomic_cmpxchg_relaxed() calls will be safe.
			
@@ -573,7 +573,7 @@ queue:
 
				 	if ((val = pv_wait_head_or_lock(lock, node)))
			
 
				 		goto locked;
			
 
				 
			
 
				-	smp_cond_acquire(!((val = atomic_read(&lock->val)) & _Q_LOCKED_PENDING_MASK));
			
 
				+	val = smp_cond_load_acquire(&lock->val.counter, !(VAL & _Q_LOCKED_PENDING_MASK));
			
 
				 
			
 
				 locked:
			
 
				 	/*
			
@@ -593,9 +593,9 @@ locked:
 
				 			break;
			
 
				 		}
			
 
				 		/*
			
 
				-		 * The smp_cond_acquire() call above has provided the necessary
			
 
				-		 * acquire semantics required for locking. At most two
			
 
				-		 * iterations of this loop may be ran.
			
 
				+		 * The smp_cond_load_acquire() call above has provided the
			
 
				+		 * necessary acquire semantics required for locking. At most
			
 
				+		 * two iterations of this loop may be ran.
			
 
				 		 */
			
 
				 		old = atomic_cmpxchg_relaxed(&lock->val, val, _Q_LOCKED_VAL);
			
 
				 		if (old == val)
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1935,7 +1935,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 
				  * chain to provide order. Instead we do:
			
 
				  *
			
 
				  *   1) smp_store_release(X->on_cpu, 0)
			
 
				- *   2) smp_cond_acquire(!X->on_cpu)
			
 
				+ *   2) smp_cond_load_acquire(!X->on_cpu)
			
 
				  *
			
 
				  * Example:
			
 
				  *
			
@@ -1946,7 +1946,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 
				  *   sched-out X
			
 
				  *   smp_store_release(X->on_cpu, 0);
			
 
				  *
			
 
				- *                    smp_cond_acquire(!X->on_cpu);
			
 
				+ *                    smp_cond_load_acquire(&X->on_cpu, !VAL);
			
 
				  *                    X->state = WAKING
			
 
				  *                    set_task_cpu(X,2)
			
 
				  *
			
@@ -1972,7 +1972,7 @@ static void ttwu_queue(struct task_struct *p, int cpu, int wake_flags)
 
				  * This means that any means of doing remote wakeups must order the CPU doing
			
 
				  * the wakeup against the CPU the task is going to end up running on. This,
			
 
				  * however, is already required for the regular Program-Order guarantee above,
			
 
				- * since the waking CPU is the one issueing the ACQUIRE (smp_cond_acquire).
			
 
				+ * since the waking CPU is the one issueing the ACQUIRE (smp_cond_load_acquire).
			
 
				  *
			
 
				  */
			
 
				 
			
@@ -2045,7 +2045,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
				 	 * This ensures that tasks getting woken will be fully ordered against
			
 
				 	 * their previous state and preserve Program Order.
			
 
				 	 */
			
 
				-	smp_cond_acquire(!p->on_cpu);
			
 
				+	smp_cond_load_acquire(&p->on_cpu, !VAL);
			
 
				 
			
 
				 	p->sched_contributes_to_load = !!task_contributes_to_load(p);
			
 
				 	p->state = TASK_WAKING;
			
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1113,7 +1113,7 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 
				 	 * In particular, the load of prev->state in finish_task_switch() must
			
 
				 	 * happen before this.
			
 
				 	 *
			
 
				-	 * Pairs with the smp_cond_acquire() in try_to_wake_up().
			
 
				+	 * Pairs with the smp_cond_load_acquire() in try_to_wake_up().
			
 
				 	 */
			
 
				 	smp_store_release(&prev->on_cpu, 0);
			
 
				 #endif
			
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -107,7 +107,7 @@ void __init call_function_init(void)
 
				  */
			
 
				 static __always_inline void csd_lock_wait(struct call_single_data *csd)
			
 
				 {
			
 
				-	smp_cond_acquire(!(csd->flags & CSD_FLAG_LOCK));
			
 
				+	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
			
 
				 }
			
 
				 
			
 
				 static __always_inline void csd_lock(struct call_single_data *csd)