9 years ago · d89e588ca4
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -367,5 +367,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw)
 
				  * smp_mb__before_spinlock() can restore the required ordering.
			
 
				  */
			
 
				 #define smp_mb__before_spinlock()	smp_mb()
			
 
				+/* See include/linux/spinlock.h */
			
 
				+#define smp_mb__after_spinlock()	smp_mb()
			
 
				 
			
 
				 #endif /* __ASM_SPINLOCK_H */
			
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -342,5 +342,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
 
				 #define arch_read_relax(lock)	__rw_yield(lock)
			
 
				 #define arch_write_relax(lock)	__rw_yield(lock)
			
 
				 
			
 
				+/* See include/linux/spinlock.h */
			
 
				+#define smp_mb__after_spinlock()   smp_mb()
			
 
				+
			
 
				 #endif /* __KERNEL__ */
			
 
				 #endif /* __ASM_SPINLOCK_H */
			
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -38,6 +38,9 @@
 
				  * Besides, if an arch has a special barrier for acquire/release, it could
			
 
				  * implement its own __atomic_op_* and use the same framework for building
			
 
				  * variants
			
 
				+ *
			
 
				+ * If an architecture overrides __atomic_op_acquire() it will probably want
			
 
				+ * to define smp_mb__after_spinlock().
			
 
				  */
			
 
				 #ifndef __atomic_op_acquire
			
 
				 #define __atomic_op_acquire(op, args...)				\
			
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -130,6 +130,42 @@ do {								\
 
				 #define smp_mb__before_spinlock()	smp_wmb()
			
 
				 #endif
			
 
				 
			
 
				+/*
			
 
				+ * This barrier must provide two things:
			
 
				+ *
			
 
				+ *   - it must guarantee a STORE before the spin_lock() is ordered against a
			
 
				+ *     LOAD after it, see the comments at its two usage sites.
			
 
				+ *
			
 
				+ *   - it must ensure the critical section is RCsc.
			
 
				+ *
			
 
				+ * The latter is important for cases where we observe values written by other
			
 
				+ * CPUs in spin-loops, without barriers, while being subject to scheduling.
			
 
				+ *
			
 
				+ * CPU0			CPU1			CPU2
			
 
				+ *
			
 
				+ *			for (;;) {
			
 
				+ *			  if (READ_ONCE(X))
			
 
				+ *			    break;
			
 
				+ *			}
			
 
				+ * X=1
			
 
				+ *			<sched-out>
			
 
				+ *						<sched-in>
			
 
				+ *						r = X;
			
 
				+ *
			
 
				+ * without transitivity it could be that CPU1 observes X!=0 breaks the loop,
			
 
				+ * we get migrated and CPU2 sees X==0.
			
 
				+ *
			
 
				+ * Since most load-store architectures implement ACQUIRE with an smp_mb() after
			
 
				+ * the LL/SC loop, they need no further barriers. Similarly all our TSO
			
 
				+ * architectures imply an smp_mb() for each atomic instruction and equally don't
			
 
				+ * need more.
			
 
				+ *
			
 
				+ * Architectures that can implement ACQUIRE better need to take care.
			
 
				+ */
			
 
				+#ifndef smp_mb__after_spinlock
			
 
				+#define smp_mb__after_spinlock()	do { } while (0)
			
 
				+#endif
			
 
				+
			
 
				 /**
			
 
				  * raw_spin_unlock_wait - wait until the spinlock gets unlocked
			
 
				  * @lock: the spinlock in question.
			
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1967,8 +1967,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
 
				 	 * reordered with p->state check below. This pairs with mb() in
			
 
				 	 * set_current_state() the waiting thread does.
			
 
				 	 */
			
 
				-	smp_mb__before_spinlock();
			
 
				 	raw_spin_lock_irqsave(&p->pi_lock, flags);
			
 
				+	smp_mb__after_spinlock();
			
 
				 	if (!(p->state & state))
			
 
				 		goto out;
			
 
				 
			
@@ -3281,8 +3281,8 @@ static void __sched notrace __schedule(bool preempt)
 
				 	 * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE)
			
 
				 	 * done by the caller to avoid the race with signal_wake_up().
			
 
				 	 */
			
 
				-	smp_mb__before_spinlock();
			
 
				 	rq_lock(rq, &rf);
			
 
				+	smp_mb__after_spinlock();
			
 
				 
			
 
				 	/* Promote REQ to ACT */
			
 
				 	rq->clock_update_flags <<= 1;