|
@@ -8,8 +8,10 @@
|
|
|
#include <linux/types.h>
|
|
|
#include <linux/export.h>
|
|
|
#include <linux/spinlock.h>
|
|
|
+#include <linux/jiffies.h>
|
|
|
#include <linux/init.h>
|
|
|
#include <linux/smp.h>
|
|
|
+#include <linux/percpu.h>
|
|
|
#include <asm/io.h>
|
|
|
|
|
|
int spin_retry = -1;
|
|
@@ -32,6 +34,40 @@ static int __init spin_retry_setup(char *str)
|
|
|
}
|
|
|
__setup("spin_retry=", spin_retry_setup);
|
|
|
|
|
|
+struct spin_wait {
|
|
|
+ struct spin_wait *next, *prev;
|
|
|
+ int node_id;
|
|
|
+} __aligned(32);
|
|
|
+
|
|
|
+static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
|
|
|
+
|
|
|
+#define _Q_LOCK_CPU_OFFSET 0
|
|
|
+#define _Q_LOCK_STEAL_OFFSET 16
|
|
|
+#define _Q_TAIL_IDX_OFFSET 18
|
|
|
+#define _Q_TAIL_CPU_OFFSET 20
|
|
|
+
|
|
|
+#define _Q_LOCK_CPU_MASK 0x0000ffff
|
|
|
+#define _Q_LOCK_STEAL_ADD 0x00010000
|
|
|
+#define _Q_LOCK_STEAL_MASK 0x00030000
|
|
|
+#define _Q_TAIL_IDX_MASK 0x000c0000
|
|
|
+#define _Q_TAIL_CPU_MASK 0xfff00000
|
|
|
+
|
|
|
+#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
|
|
|
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
|
|
|
+
|
|
|
+void arch_spin_lock_setup(int cpu)
|
|
|
+{
|
|
|
+ struct spin_wait *node;
|
|
|
+ int ix;
|
|
|
+
|
|
|
+ node = per_cpu_ptr(&spin_wait[0], cpu);
|
|
|
+ for (ix = 0; ix < 4; ix++, node++) {
|
|
|
+ memset(node, 0, sizeof(*node));
|
|
|
+ node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
|
|
|
+ (ix << _Q_TAIL_IDX_OFFSET);
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
static inline int arch_load_niai4(int *lock)
|
|
|
{
|
|
|
int owner;
|
|
@@ -60,75 +96,160 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
|
|
|
return expected == old;
|
|
|
}
|
|
|
|
|
|
-void arch_spin_lock_wait(arch_spinlock_t *lp)
|
|
|
+static inline struct spin_wait *arch_spin_decode_tail(int lock)
|
|
|
{
|
|
|
- int cpu = SPINLOCK_LOCKVAL;
|
|
|
- int owner, count;
|
|
|
+ int ix, cpu;
|
|
|
+
|
|
|
+ ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
|
|
|
+ cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
|
|
|
+ return per_cpu_ptr(&spin_wait[ix], cpu - 1);
|
|
|
+}
|
|
|
+
|
|
|
+static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
|
|
|
+{
|
|
|
+ if (lock & _Q_LOCK_CPU_MASK)
|
|
|
+ return lock & _Q_LOCK_CPU_MASK;
|
|
|
+ if (node == NULL || node->prev == NULL)
|
|
|
+ return 0; /* 0 -> no target cpu */
|
|
|
+ while (node->prev)
|
|
|
+ node = node->prev;
|
|
|
+ return node->node_id >> _Q_TAIL_CPU_OFFSET;
|
|
|
+}
|
|
|
+
|
|
|
+static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
|
|
|
+{
|
|
|
+ struct spin_wait *node, *next;
|
|
|
+ int lockval, ix, node_id, tail_id, old, new, owner, count;
|
|
|
+
|
|
|
+ ix = S390_lowcore.spinlock_index++;
|
|
|
+ barrier();
|
|
|
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
|
|
|
+ node = this_cpu_ptr(&spin_wait[ix]);
|
|
|
+ node->prev = node->next = NULL;
|
|
|
+ node_id = node->node_id;
|
|
|
+
|
|
|
+ /* Enqueue the node for this CPU in the spinlock wait queue */
|
|
|
+ while (1) {
|
|
|
+ old = READ_ONCE(lp->lock);
|
|
|
+ if ((old & _Q_LOCK_CPU_MASK) == 0 &&
|
|
|
+ (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
|
|
|
+ /*
|
|
|
+ * The lock is free but there may be waiters.
|
|
|
+ * With no waiters simply take the lock, if there
|
|
|
+ * are waiters try to steal the lock. The lock may
|
|
|
+ * be stolen three times before the next queued
|
|
|
+ * waiter will get the lock.
|
|
|
+ */
|
|
|
+ new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
|
|
|
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
|
+ /* Got the lock */
|
|
|
+ goto out;
|
|
|
+ /* lock passing in progress */
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ /* Make the node of this CPU the new tail. */
|
|
|
+ new = node_id | (old & _Q_LOCK_MASK);
|
|
|
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ /* Set the 'next' pointer of the tail node in the queue */
|
|
|
+ tail_id = old & _Q_TAIL_MASK;
|
|
|
+ if (tail_id != 0) {
|
|
|
+ node->prev = arch_spin_decode_tail(tail_id);
|
|
|
+ WRITE_ONCE(node->prev->next, node);
|
|
|
+ }
|
|
|
|
|
|
/* Pass the virtual CPU to the lock holder if it is not running */
|
|
|
- owner = arch_load_niai4(&lp->lock);
|
|
|
+ owner = arch_spin_yield_target(old, node);
|
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
|
smp_yield_cpu(owner - 1);
|
|
|
|
|
|
+ /* Spin on the CPU local node->prev pointer */
|
|
|
+ if (tail_id != 0) {
|
|
|
+ count = spin_retry;
|
|
|
+ while (READ_ONCE(node->prev) != NULL) {
|
|
|
+ if (count-- >= 0)
|
|
|
+ continue;
|
|
|
+ count = spin_retry;
|
|
|
+ /* Query running state of lock holder again. */
|
|
|
+ owner = arch_spin_yield_target(old, node);
|
|
|
+ if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
|
+ smp_yield_cpu(owner - 1);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ /* Spin on the lock value in the spinlock_t */
|
|
|
count = spin_retry;
|
|
|
while (1) {
|
|
|
- owner = arch_load_niai4(&lp->lock);
|
|
|
- /* Try to get the lock if it is free. */
|
|
|
+ old = READ_ONCE(lp->lock);
|
|
|
+ owner = old & _Q_LOCK_CPU_MASK;
|
|
|
if (!owner) {
|
|
|
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
|
|
|
- return;
|
|
|
+ tail_id = old & _Q_TAIL_MASK;
|
|
|
+ new = ((tail_id != node_id) ? tail_id : 0) | lockval;
|
|
|
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
|
|
|
+ /* Got the lock */
|
|
|
+ break;
|
|
|
continue;
|
|
|
}
|
|
|
if (count-- >= 0)
|
|
|
continue;
|
|
|
count = spin_retry;
|
|
|
- /*
|
|
|
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
|
|
|
- * yield the CPU unconditionally. For LPAR rely on the
|
|
|
- * sense running status.
|
|
|
- */
|
|
|
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
|
|
|
smp_yield_cpu(owner - 1);
|
|
|
}
|
|
|
+
|
|
|
+ /* Pass lock_spin job to next CPU in the queue */
|
|
|
+ if (node_id && tail_id != node_id) {
|
|
|
+ /* Wait until the next CPU has set up the 'next' pointer */
|
|
|
+ while ((next = READ_ONCE(node->next)) == NULL)
|
|
|
+ ;
|
|
|
+ next->prev = NULL;
|
|
|
+ }
|
|
|
+
|
|
|
+ out:
|
|
|
+ S390_lowcore.spinlock_index--;
|
|
|
}
|
|
|
-EXPORT_SYMBOL(arch_spin_lock_wait);
|
|
|
|
|
|
-void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
|
|
|
+static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
|
|
|
{
|
|
|
- int cpu = SPINLOCK_LOCKVAL;
|
|
|
- int owner, count;
|
|
|
+ int lockval, old, new, owner, count;
|
|
|
|
|
|
- local_irq_restore(flags);
|
|
|
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
|
|
|
|
|
|
/* Pass the virtual CPU to the lock holder if it is not running */
|
|
|
- owner = arch_load_niai4(&lp->lock);
|
|
|
+ owner = arch_spin_yield_target(ACCESS_ONCE(lp->lock), NULL);
|
|
|
if (owner && arch_vcpu_is_preempted(owner - 1))
|
|
|
smp_yield_cpu(owner - 1);
|
|
|
|
|
|
count = spin_retry;
|
|
|
while (1) {
|
|
|
- owner = arch_load_niai4(&lp->lock);
|
|
|
+ old = arch_load_niai4(&lp->lock);
|
|
|
+ owner = old & _Q_LOCK_CPU_MASK;
|
|
|
/* Try to get the lock if it is free. */
|
|
|
if (!owner) {
|
|
|
- local_irq_disable();
|
|
|
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
|
|
|
- return;
|
|
|
- local_irq_restore(flags);
|
|
|
+ new = (old & _Q_TAIL_MASK) | lockval;
|
|
|
+ if (arch_cmpxchg_niai8(&lp->lock, old, new))
|
|
|
+ /* Got the lock */
|
|
|
+ return;
|
|
|
continue;
|
|
|
}
|
|
|
if (count-- >= 0)
|
|
|
continue;
|
|
|
count = spin_retry;
|
|
|
- /*
|
|
|
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
|
|
|
- * yield the CPU unconditionally. For LPAR rely on the
|
|
|
- * sense running status.
|
|
|
- */
|
|
|
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
|
|
|
smp_yield_cpu(owner - 1);
|
|
|
}
|
|
|
}
|
|
|
-EXPORT_SYMBOL(arch_spin_lock_wait_flags);
|
|
|
+
|
|
|
+void arch_spin_lock_wait(arch_spinlock_t *lp)
|
|
|
+{
|
|
|
+ /* Use classic spinlocks + niai if the steal time is >= 10% */
|
|
|
+ if (test_cpu_flag(CIF_DEDICATED_CPU))
|
|
|
+ arch_spin_lock_queued(lp);
|
|
|
+ else
|
|
|
+ arch_spin_lock_classic(lp);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(arch_spin_lock_wait);
|
|
|
|
|
|
int arch_spin_trylock_retry(arch_spinlock_t *lp)
|
|
|
{
|
|
@@ -270,3 +391,16 @@ void arch_lock_relax(int cpu)
|
|
|
smp_yield_cpu(cpu - 1);
|
|
|
}
|
|
|
EXPORT_SYMBOL(arch_lock_relax);
|
|
|
+
|
|
|
+void arch_spin_relax(arch_spinlock_t *lp)
|
|
|
+{
|
|
|
+ int cpu;
|
|
|
+
|
|
|
+ cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
|
|
|
+ if (!cpu)
|
|
|
+ return;
|
|
|
+ if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
|
|
|
+ return;
|
|
|
+ smp_yield_cpu(cpu - 1);
|
|
|
+}
|
|
|
+EXPORT_SYMBOL(arch_spin_relax);
|