10 жил өмнө · 75d2270280
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -239,8 +239,8 @@ static __always_inline void set_locked(struct qspinlock *lock)
 
				 
			
 
				 static __always_inline void __pv_init_node(struct mcs_spinlock *node) { }
			
 
				 static __always_inline void __pv_wait_node(struct mcs_spinlock *node) { }
			
 
				-static __always_inline void __pv_kick_node(struct mcs_spinlock *node) { }
			
 
				-
			
 
				+static __always_inline void __pv_kick_node(struct qspinlock *lock,
			
 
				+					   struct mcs_spinlock *node) { }
			
 
				 static __always_inline void __pv_wait_head(struct qspinlock *lock,
			
 
				 					   struct mcs_spinlock *node) { }
			
 
				 
			
@@ -440,7 +440,7 @@ queue:
 
				 		cpu_relax();
			
 
				 
			
 
				 	arch_mcs_spin_unlock_contended(&next->locked);
			
 
				-	pv_kick_node(next);
			
 
				+	pv_kick_node(lock, next);
			
 
				 
			
 
				 release:
			
 
				 	/*
			
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -22,9 +22,14 @@
 
				 
			
 
				 #define _Q_SLOW_VAL	(3U << _Q_LOCKED_OFFSET)
			
 
				 
			
 
				+/*
			
 
				+ * Queue node uses: vcpu_running & vcpu_halted.
			
 
				+ * Queue head uses: vcpu_running & vcpu_hashed.
			
 
				+ */
			
 
				 enum vcpu_state {
			
 
				 	vcpu_running = 0,
			
 
				-	vcpu_halted,
			
 
				+	vcpu_halted,		/* Used only in pv_wait_node */
			
 
				+	vcpu_hashed,		/* = pv_hash'ed + vcpu_halted */
			
 
				 };
			
 
				 
			
 
				 struct pv_node {
			
@@ -153,7 +158,8 @@ static void pv_init_node(struct mcs_spinlock *node)
 
				 
			
 
				 /*
			
 
				  * Wait for node->locked to become true, halt the vcpu after a short spin.
			
 
				- * pv_kick_node() is used to wake the vcpu again.
			
 
				+ * pv_kick_node() is used to set _Q_SLOW_VAL and fill in hash table on its
			
 
				+ * behalf.
			
 
				  */
			
 
				 static void pv_wait_node(struct mcs_spinlock *node)
			
 
				 {
			
@@ -172,9 +178,9 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 		 *
			
 
				 		 * [S] pn->state = vcpu_halted	  [S] next->locked = 1
			
 
				 		 *     MB			      MB
			
 
				-		 * [L] pn->locked		[RmW] pn->state = vcpu_running
			
 
				+		 * [L] pn->locked		[RmW] pn->state = vcpu_hashed
			
 
				 		 *
			
 
				-		 * Matches the xchg() from pv_kick_node().
			
 
				+		 * Matches the cmpxchg() from pv_kick_node().
			
 
				 		 */
			
 
				 		smp_store_mb(pn->state, vcpu_halted);
			
 
				 
			
@@ -182,9 +188,10 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 			pv_wait(&pn->state, vcpu_halted);
			
 
				 
			
 
				 		/*
			
 
				-		 * Reset the vCPU state to avoid unncessary CPU kicking
			
 
				+		 * If pv_kick_node() changed us to vcpu_hashed, retain that value
			
 
				+		 * so that pv_wait_head() knows to not also try to hash this lock.
			
 
				 		 */
			
 
				-		WRITE_ONCE(pn->state, vcpu_running);
			
 
				+		cmpxchg(&pn->state, vcpu_halted, vcpu_running);
			
 
				 
			
 
				 		/*
			
 
				 		 * If the locked flag is still not set after wakeup, it is a
			
@@ -194,6 +201,7 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 		 * MCS lock will be released soon.
			
 
				 		 */
			
 
				 	}
			
 
				+
			
 
				 	/*
			
 
				 	 * By now our node->locked should be 1 and our caller will not actually
			
 
				 	 * spin-wait for it. We do however rely on our caller to do a
			
@@ -202,24 +210,35 @@ static void pv_wait_node(struct mcs_spinlock *node)
 
				 }
			
 
				 
			
 
				 /*
			
 
				- * Called after setting next->locked = 1, used to wake those stuck in
			
 
				- * pv_wait_node().
			
 
				+ * Called after setting next->locked = 1 when we're the lock owner.
			
 
				+ *
			
 
				+ * Instead of waking the waiters stuck in pv_wait_node() advance their state such
			
 
				+ * that they're waiting in pv_wait_head(), this avoids a wake/sleep cycle.
			
 
				  */
			
 
				-static void pv_kick_node(struct mcs_spinlock *node)
			
 
				+static void pv_kick_node(struct qspinlock *lock, struct mcs_spinlock *node)
			
 
				 {
			
 
				 	struct pv_node *pn = (struct pv_node *)node;
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				 
			
 
				 	/*
			
 
				-	 * Note that because node->locked is already set, this actual
			
 
				-	 * mcs_spinlock entry could be re-used already.
			
 
				+	 * If the vCPU is indeed halted, advance its state to match that of
			
 
				+	 * pv_wait_node(). If OTOH this fails, the vCPU was running and will
			
 
				+	 * observe its next->locked value and advance itself.
			
 
				 	 *
			
 
				-	 * This should be fine however, kicking people for no reason is
			
 
				-	 * harmless.
			
 
				+	 * Matches with smp_store_mb() and cmpxchg() in pv_wait_node()
			
 
				+	 */
			
 
				+	if (cmpxchg(&pn->state, vcpu_halted, vcpu_hashed) != vcpu_halted)
			
 
				+		return;
			
 
				+
			
 
				+	/*
			
 
				+	 * Put the lock into the hash table and set the _Q_SLOW_VAL.
			
 
				 	 *
			
 
				-	 * See the comment in pv_wait_node().
			
 
				+	 * As this is the same vCPU that will check the _Q_SLOW_VAL value and
			
 
				+	 * the hash table later on at unlock time, no atomic instruction is
			
 
				+	 * needed.
			
 
				 	 */
			
 
				-	if (xchg(&pn->state, vcpu_running) == vcpu_halted)
			
 
				-		pv_kick(pn->cpu);
			
 
				+	WRITE_ONCE(l->locked, _Q_SLOW_VAL);
			
 
				+	(void)pv_hash(lock, pn);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -233,6 +252,13 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 
				 	struct qspinlock **lp = NULL;
			
 
				 	int loop;
			
 
				 
			
 
				+	/*
			
 
				+	 * If pv_kick_node() already advanced our state, we don't need to
			
 
				+	 * insert ourselves into the hash table anymore.
			
 
				+	 */
			
 
				+	if (READ_ONCE(pn->state) == vcpu_hashed)
			
 
				+		lp = (struct qspinlock **)1;
			
 
				+
			
 
				 	for (;;) {
			
 
				 		for (loop = SPIN_THRESHOLD; loop; loop--) {
			
 
				 			if (!READ_ONCE(l->locked))
			
@@ -240,9 +266,10 @@ static void pv_wait_head(struct qspinlock *lock, struct mcs_spinlock *node)
 
				 			cpu_relax();
			
 
				 		}
			
 
				 
			
 
				-		WRITE_ONCE(pn->state, vcpu_halted);
			
 
				 		if (!lp) { /* ONCE */
			
 
				+			WRITE_ONCE(pn->state, vcpu_hashed);
			
 
				 			lp = pv_hash(lock, pn);
			
 
				+
			
 
				 			/*
			
 
				 			 * We must hash before setting _Q_SLOW_VAL, such that
			
 
				 			 * when we observe _Q_SLOW_VAL in __pv_queued_spin_unlock()
			
@@ -333,8 +360,11 @@ __visible void __pv_queued_spin_unlock(struct qspinlock *lock)
 
				 	/*
			
 
				 	 * At this point the memory pointed at by lock can be freed/reused,
			
 
				 	 * however we can still use the pv_node to kick the CPU.
			
 
				+	 * The other vCPU may not really be halted, but kicking an active
			
 
				+	 * vCPU is harmless other than the additional latency in completing
			
 
				+	 * the unlock.
			
 
				 	 */
			
 
				-	if (READ_ONCE(node->state) == vcpu_halted)
			
 
				+	if (READ_ONCE(node->state) == vcpu_hashed)
			
 
				 		pv_kick(node->cpu);
			
 
				 }
			
 
				 /*