|
@@ -24,6 +24,7 @@
|
|
|
#include <linux/percpu.h>
|
|
|
#include <linux/hardirq.h>
|
|
|
#include <linux/mutex.h>
|
|
|
+#include <asm/byteorder.h>
|
|
|
#include <asm/qspinlock.h>
|
|
|
|
|
|
/*
|
|
@@ -56,6 +57,10 @@
|
|
|
* node; whereby avoiding the need to carry a node from lock to unlock, and
|
|
|
* preserving existing lock API. This also makes the unlock code simpler and
|
|
|
* faster.
|
|
|
+ *
|
|
|
+ * N.B. The current implementation only supports architectures that allow
|
|
|
+ * atomic operations on smaller 8-bit and 16-bit data types.
|
|
|
+ *
|
|
|
*/
|
|
|
|
|
|
#include "mcs_spinlock.h"
|
|
@@ -96,6 +101,62 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
|
|
|
|
|
|
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
|
|
|
|
|
|
+/*
|
|
|
+ * By using the whole 2nd least significant byte for the pending bit, we
|
|
|
+ * can allow better optimization of the lock acquisition for the pending
|
|
|
+ * bit holder.
|
|
|
+ */
|
|
|
+#if _Q_PENDING_BITS == 8
|
|
|
+
|
|
|
+struct __qspinlock {
|
|
|
+ union {
|
|
|
+ atomic_t val;
|
|
|
+ struct {
|
|
|
+#ifdef __LITTLE_ENDIAN
|
|
|
+ u16 locked_pending;
|
|
|
+ u16 tail;
|
|
|
+#else
|
|
|
+ u16 tail;
|
|
|
+ u16 locked_pending;
|
|
|
+#endif
|
|
|
+ };
|
|
|
+ };
|
|
|
+};
|
|
|
+
|
|
|
+/**
|
|
|
+ * clear_pending_set_locked - take ownership and clear the pending bit.
|
|
|
+ * @lock: Pointer to queued spinlock structure
|
|
|
+ *
|
|
|
+ * *,1,0 -> *,0,1
|
|
|
+ *
|
|
|
+ * Lock stealing is not allowed if this function is used.
|
|
|
+ */
|
|
|
+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
|
|
|
+{
|
|
|
+ struct __qspinlock *l = (void *)lock;
|
|
|
+
|
|
|
+ WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
|
|
|
+ * @lock : Pointer to queued spinlock structure
|
|
|
+ * @tail : The new queue tail code word
|
|
|
+ * Return: The previous queue tail code word
|
|
|
+ *
|
|
|
+ * xchg(lock, tail)
|
|
|
+ *
|
|
|
+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
|
|
|
+ */
|
|
|
+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
|
|
+{
|
|
|
+ struct __qspinlock *l = (void *)lock;
|
|
|
+
|
|
|
+ return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
|
|
|
+}
|
|
|
+
|
|
|
+#else /* _Q_PENDING_BITS == 8 */
|
|
|
+
|
|
|
/**
|
|
|
* clear_pending_set_locked - take ownership and clear the pending bit.
|
|
|
* @lock: Pointer to queued spinlock structure
|
|
@@ -131,6 +192,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
|
|
|
}
|
|
|
return old;
|
|
|
}
|
|
|
+#endif /* _Q_PENDING_BITS == 8 */
|
|
|
|
|
|
/**
|
|
|
* queued_spin_lock_slowpath - acquire the queued spinlock
|
|
@@ -205,8 +267,13 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
|
|
* we're pending, wait for the owner to go away.
|
|
|
*
|
|
|
* *,1,1 -> *,1,0
|
|
|
+ *
|
|
|
+ * this wait loop must be a load-acquire such that we match the
|
|
|
+ * store-release that clears the locked bit and create lock
|
|
|
+ * sequentiality; this is because not all clear_pending_set_locked()
|
|
|
+ * implementations imply full barriers.
|
|
|
*/
|
|
|
- while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
|
|
|
+ while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
|
|
|
cpu_relax();
|
|
|
|
|
|
/*
|