|
@@ -267,6 +267,63 @@ static __always_inline u32 __pv_wait_head_or_lock(struct qspinlock *lock,
|
|
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
|
|
#define queued_spin_lock_slowpath native_queued_spin_lock_slowpath
|
|
#endif
|
|
#endif
|
|
|
|
|
|
|
|
+/*
|
|
|
|
+ * Various notes on spin_is_locked() and spin_unlock_wait(), which are
|
|
|
|
+ * 'interesting' functions:
|
|
|
|
+ *
|
|
|
|
+ * PROBLEM: some architectures have an interesting issue with atomic ACQUIRE
|
|
|
|
+ * operations in that the ACQUIRE applies to the LOAD _not_ the STORE (ARM64,
|
|
|
|
+ * PPC). Also qspinlock has a similar issue per construction, the setting of
|
|
|
|
+ * the locked byte can be unordered acquiring the lock proper.
|
|
|
|
+ *
|
|
|
|
+ * This gets to be 'interesting' in the following cases, where the /should/s
|
|
|
|
+ * end up false because of this issue.
|
|
|
|
+ *
|
|
|
|
+ *
|
|
|
|
+ * CASE 1:
|
|
|
|
+ *
|
|
|
|
+ * So the spin_is_locked() correctness issue comes from something like:
|
|
|
|
+ *
|
|
|
|
+ * CPU0 CPU1
|
|
|
|
+ *
|
|
|
|
+ * global_lock(); local_lock(i)
|
|
|
|
+ * spin_lock(&G) spin_lock(&L[i])
|
|
|
|
+ * for (i) if (!spin_is_locked(&G)) {
|
|
|
|
+ * spin_unlock_wait(&L[i]); smp_acquire__after_ctrl_dep();
|
|
|
|
+ * return;
|
|
|
|
+ * }
|
|
|
|
+ * // deal with fail
|
|
|
|
+ *
|
|
|
|
+ * Where it is important CPU1 sees G locked or CPU0 sees L[i] locked such
|
|
|
|
+ * that there is exclusion between the two critical sections.
|
|
|
|
+ *
|
|
|
|
+ * The load from spin_is_locked(&G) /should/ be constrained by the ACQUIRE from
|
|
|
|
+ * spin_lock(&L[i]), and similarly the load(s) from spin_unlock_wait(&L[i])
|
|
|
|
+ * /should/ be constrained by the ACQUIRE from spin_lock(&G).
|
|
|
|
+ *
|
|
|
|
+ * Similarly, later stuff is constrained by the ACQUIRE from CTRL+RMB.
|
|
|
|
+ *
|
|
|
|
+ *
|
|
|
|
+ * CASE 2:
|
|
|
|
+ *
|
|
|
|
+ * For spin_unlock_wait() there is a second correctness issue, namely:
|
|
|
|
+ *
|
|
|
|
+ * CPU0 CPU1
|
|
|
|
+ *
|
|
|
|
+ * flag = set;
|
|
|
|
+ * smp_mb(); spin_lock(&l)
|
|
|
|
+ * spin_unlock_wait(&l); if (!flag)
|
|
|
|
+ * // add to lockless list
|
|
|
|
+ * spin_unlock(&l);
|
|
|
|
+ * // iterate lockless list
|
|
|
|
+ *
|
|
|
|
+ * Which wants to ensure that CPU1 will stop adding bits to the list and CPU0
|
|
|
|
+ * will observe the last entry on the list (if spin_unlock_wait() had ACQUIRE
|
|
|
|
+ * semantics etc..)
|
|
|
|
+ *
|
|
|
|
+ * Where flag /should/ be ordered against the locked store of l.
|
|
|
|
+ */
|
|
|
|
+
|
|
/*
|
|
/*
|
|
* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
|
|
* queued_spin_lock_slowpath() can (load-)ACQUIRE the lock before
|
|
* issuing an _unordered_ store to set _Q_LOCKED_VAL.
|
|
* issuing an _unordered_ store to set _Q_LOCKED_VAL.
|