10 лет назад · 69f9cae909
--- a/include/asm-generic/qspinlock_types.h
+++ b/include/asm-generic/qspinlock_types.h
@@ -35,6 +35,14 @@ typedef struct qspinlock {
 
				 /*
			
 
				  * Bitfields in the atomic value:
			
 
				  *
			
 
				+ * When NR_CPUS < 16K
			
 
				+ *  0- 7: locked byte
			
 
				+ *     8: pending
			
 
				+ *  9-15: not used
			
 
				+ * 16-17: tail index
			
 
				+ * 18-31: tail cpu (+1)
			
 
				+ *
			
 
				+ * When NR_CPUS >= 16K
			
 
				  *  0- 7: locked byte
			
 
				  *     8: pending
			
 
				  *  9-10: tail index
			
@@ -47,7 +55,11 @@ typedef struct qspinlock {
 
				 #define _Q_LOCKED_MASK		_Q_SET_MASK(LOCKED)
			
 
				 
			
 
				 #define _Q_PENDING_OFFSET	(_Q_LOCKED_OFFSET + _Q_LOCKED_BITS)
			
 
				+#if CONFIG_NR_CPUS < (1U << 14)
			
 
				+#define _Q_PENDING_BITS		8
			
 
				+#else
			
 
				 #define _Q_PENDING_BITS		1
			
 
				+#endif
			
 
				 #define _Q_PENDING_MASK		_Q_SET_MASK(PENDING)
			
 
				 
			
 
				 #define _Q_TAIL_IDX_OFFSET	(_Q_PENDING_OFFSET + _Q_PENDING_BITS)
			
@@ -58,6 +70,7 @@ typedef struct qspinlock {
 
				 #define _Q_TAIL_CPU_BITS	(32 - _Q_TAIL_CPU_OFFSET)
			
 
				 #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
			
 
				 
			
 
				+#define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
			
 
				 #define _Q_TAIL_MASK		(_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
			
 
				 
			
 
				 #define _Q_LOCKED_VAL		(1U << _Q_LOCKED_OFFSET)
			
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -24,6 +24,7 @@
 
				 #include <linux/percpu.h>
			
 
				 #include <linux/hardirq.h>
			
 
				 #include <linux/mutex.h>
			
 
				+#include <asm/byteorder.h>
			
 
				 #include <asm/qspinlock.h>
			
 
				 
			
 
				 /*
			
@@ -56,6 +57,10 @@
 
				  * node; whereby avoiding the need to carry a node from lock to unlock, and
			
 
				  * preserving existing lock API. This also makes the unlock code simpler and
			
 
				  * faster.
			
 
				+ *
			
 
				+ * N.B. The current implementation only supports architectures that allow
			
 
				+ *      atomic operations on smaller 8-bit and 16-bit data types.
			
 
				+ *
			
 
				  */
			
 
				 
			
 
				 #include "mcs_spinlock.h"
			
@@ -96,6 +101,62 @@ static inline struct mcs_spinlock *decode_tail(u32 tail)
 
				 
			
 
				 #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
			
 
				 
			
 
				+/*
			
 
				+ * By using the whole 2nd least significant byte for the pending bit, we
			
 
				+ * can allow better optimization of the lock acquisition for the pending
			
 
				+ * bit holder.
			
 
				+ */
			
 
				+#if _Q_PENDING_BITS == 8
			
 
				+
			
 
				+struct __qspinlock {
			
 
				+	union {
			
 
				+		atomic_t val;
			
 
				+		struct {
			
 
				+#ifdef __LITTLE_ENDIAN
			
 
				+			u16	locked_pending;
			
 
				+			u16	tail;
			
 
				+#else
			
 
				+			u16	tail;
			
 
				+			u16	locked_pending;
			
 
				+#endif
			
 
				+		};
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+/**
			
 
				+ * clear_pending_set_locked - take ownership and clear the pending bit.
			
 
				+ * @lock: Pointer to queued spinlock structure
			
 
				+ *
			
 
				+ * *,1,0 -> *,0,1
			
 
				+ *
			
 
				+ * Lock stealing is not allowed if this function is used.
			
 
				+ */
			
 
				+static __always_inline void clear_pending_set_locked(struct qspinlock *lock)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	WRITE_ONCE(l->locked_pending, _Q_LOCKED_VAL);
			
 
				+}
			
 
				+
			
 
				+/*
			
 
				+ * xchg_tail - Put in the new queue tail code word & retrieve previous one
			
 
				+ * @lock : Pointer to queued spinlock structure
			
 
				+ * @tail : The new queue tail code word
			
 
				+ * Return: The previous queue tail code word
			
 
				+ *
			
 
				+ * xchg(lock, tail)
			
 
				+ *
			
 
				+ * p,*,* -> n,*,* ; prev = xchg(lock, node)
			
 
				+ */
			
 
				+static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
			
 
				+{
			
 
				+	struct __qspinlock *l = (void *)lock;
			
 
				+
			
 
				+	return (u32)xchg(&l->tail, tail >> _Q_TAIL_OFFSET) << _Q_TAIL_OFFSET;
			
 
				+}
			
 
				+
			
 
				+#else /* _Q_PENDING_BITS == 8 */
			
 
				+
			
 
				 /**
			
 
				  * clear_pending_set_locked - take ownership and clear the pending bit.
			
 
				  * @lock: Pointer to queued spinlock structure
			
@@ -131,6 +192,7 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail)
 
				 	}
			
 
				 	return old;
			
 
				 }
			
 
				+#endif /* _Q_PENDING_BITS == 8 */
			
 
				 
			
 
				 /**
			
 
				  * queued_spin_lock_slowpath - acquire the queued spinlock
			
@@ -205,8 +267,13 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
 
				 	 * we're pending, wait for the owner to go away.
			
 
				 	 *
			
 
				 	 * *,1,1 -> *,1,0
			
 
				+	 *
			
 
				+	 * this wait loop must be a load-acquire such that we match the
			
 
				+	 * store-release that clears the locked bit and create lock
			
 
				+	 * sequentiality; this is because not all clear_pending_set_locked()
			
 
				+	 * implementations imply full barriers.
			
 
				 	 */
			
 
				-	while ((val = atomic_read(&lock->val)) & _Q_LOCKED_MASK)
			
 
				+	while ((val = smp_load_acquire(&lock->val.counter)) & _Q_LOCKED_MASK)
			
 
				 		cpu_relax();
			
 
				 
			
 
				 	/*