|
@@ -74,12 +74,24 @@
|
|
|
*/
|
|
|
|
|
|
#include "mcs_spinlock.h"
|
|
|
+#define MAX_NODES 4
|
|
|
|
|
|
+/*
|
|
|
+ * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in
|
|
|
+ * size and four of them will fit nicely in one 64-byte cacheline. For
|
|
|
+ * pvqspinlock, however, we need more space for extra data. To accommodate
|
|
|
+ * that, we insert two more long words to pad it up to 32 bytes. IOW, only
|
|
|
+ * two of them can fit in a cacheline in this case. That is OK as it is rare
|
|
|
+ * to have more than 2 levels of slowpath nesting in actual use. We don't
|
|
|
+ * want to penalize pvqspinlocks to optimize for a rare case in native
|
|
|
+ * qspinlocks.
|
|
|
+ */
|
|
|
+struct qnode {
|
|
|
+ struct mcs_spinlock mcs;
|
|
|
#ifdef CONFIG_PARAVIRT_SPINLOCKS
|
|
|
-#define MAX_NODES 8
|
|
|
-#else
|
|
|
-#define MAX_NODES 4
|
|
|
+ long reserved[2];
|
|
|
#endif
|
|
|
+};
|
|
|
|
|
|
/*
|
|
|
* The pending bit spinning loop count.
|
|
@@ -101,7 +113,7 @@
|
|
|
*
|
|
|
* PV doubles the storage and uses the second cacheline for PV state.
|
|
|
*/
|
|
|
-static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]);
|
|
|
+static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]);
|
|
|
|
|
|
/*
|
|
|
* We must be able to distinguish between no-tail and the tail at 0:0,
|
|
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail)
|
|
|
int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1;
|
|
|
int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
|
|
|
|
|
|
- return per_cpu_ptr(&mcs_nodes[idx], cpu);
|
|
|
+ return per_cpu_ptr(&qnodes[idx].mcs, cpu);
|
|
|
+}
|
|
|
+
|
|
|
+static inline __pure
|
|
|
+struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx)
|
|
|
+{
|
|
|
+ return &((struct qnode *)base + idx)->mcs;
|
|
|
}
|
|
|
|
|
|
#define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK)
|
|
@@ -390,11 +408,11 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
|
|
|
queue:
|
|
|
qstat_inc(qstat_lock_slowpath, true);
|
|
|
pv_queue:
|
|
|
- node = this_cpu_ptr(&mcs_nodes[0]);
|
|
|
+ node = this_cpu_ptr(&qnodes[0].mcs);
|
|
|
idx = node->count++;
|
|
|
tail = encode_tail(smp_processor_id(), idx);
|
|
|
|
|
|
- node += idx;
|
|
|
+ node = grab_mcs_node(node, idx);
|
|
|
|
|
|
/*
|
|
|
* Keep counts of non-zero index values:
|
|
@@ -534,7 +552,7 @@ release:
|
|
|
/*
|
|
|
* release the node
|
|
|
*/
|
|
|
- __this_cpu_dec(mcs_nodes[0].count);
|
|
|
+ __this_cpu_dec(qnodes[0].mcs.count);
|
|
|
}
|
|
|
EXPORT_SYMBOL(queued_spin_lock_slowpath);
|
|
|
|