|
@@ -53,6 +53,11 @@
|
|
|
#include "sdma.h"
|
|
|
#include "trace.h"
|
|
|
|
|
|
+struct hfi1_affinity_node_list node_affinity = {
|
|
|
+ .list = LIST_HEAD_INIT(node_affinity.list),
|
|
|
+ .lock = __SPIN_LOCK_UNLOCKED(&node_affinity.lock),
|
|
|
+};
|
|
|
+
|
|
|
/* Name of IRQ types, indexed by enum irq_type */
|
|
|
static const char * const irq_type_names[] = {
|
|
|
"SDMA",
|
|
@@ -69,45 +74,100 @@ static inline void init_cpu_mask_set(struct cpu_mask_set *set)
|
|
|
}
|
|
|
|
|
|
/* Initialize non-HT cpu cores mask */
|
|
|
-int init_real_cpu_mask(struct hfi1_devdata *dd)
|
|
|
+void init_real_cpu_mask(void)
|
|
|
{
|
|
|
- struct hfi1_affinity *info;
|
|
|
int possible, curr_cpu, i, ht;
|
|
|
|
|
|
- info = kzalloc(sizeof(*info), GFP_KERNEL);
|
|
|
- if (!info)
|
|
|
- return -ENOMEM;
|
|
|
-
|
|
|
- cpumask_clear(&info->real_cpu_mask);
|
|
|
+ cpumask_clear(&node_affinity.real_cpu_mask);
|
|
|
|
|
|
/* Start with cpu online mask as the real cpu mask */
|
|
|
- cpumask_copy(&info->real_cpu_mask, cpu_online_mask);
|
|
|
+ cpumask_copy(&node_affinity.real_cpu_mask, cpu_online_mask);
|
|
|
|
|
|
/*
|
|
|
* Remove HT cores from the real cpu mask. Do this in two steps below.
|
|
|
*/
|
|
|
- possible = cpumask_weight(&info->real_cpu_mask);
|
|
|
+ possible = cpumask_weight(&node_affinity.real_cpu_mask);
|
|
|
ht = cpumask_weight(topology_sibling_cpumask(
|
|
|
- cpumask_first(&info->real_cpu_mask)));
|
|
|
+ cpumask_first(&node_affinity.real_cpu_mask)));
|
|
|
/*
|
|
|
* Step 1. Skip over the first N HT siblings and use them as the
|
|
|
* "real" cores. Assumes that HT cores are not enumerated in
|
|
|
* succession (except in the single core case).
|
|
|
*/
|
|
|
- curr_cpu = cpumask_first(&info->real_cpu_mask);
|
|
|
+ curr_cpu = cpumask_first(&node_affinity.real_cpu_mask);
|
|
|
for (i = 0; i < possible / ht; i++)
|
|
|
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
|
|
|
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
|
|
|
/*
|
|
|
* Step 2. Remove the remaining HT siblings. Use cpumask_next() to
|
|
|
* skip any gaps.
|
|
|
*/
|
|
|
for (; i < possible; i++) {
|
|
|
- cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask);
|
|
|
- curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask);
|
|
|
+ cpumask_clear_cpu(curr_cpu, &node_affinity.real_cpu_mask);
|
|
|
+ curr_cpu = cpumask_next(curr_cpu, &node_affinity.real_cpu_mask);
|
|
|
}
|
|
|
+}
|
|
|
|
|
|
- dd->affinity = info;
|
|
|
- return 0;
|
|
|
+void node_affinity_init(void)
|
|
|
+{
|
|
|
+ cpumask_copy(&node_affinity.proc.mask, cpu_online_mask);
|
|
|
+ /*
|
|
|
+ * The real cpu mask is part of the affinity struct but it has to be
|
|
|
+ * initialized early. It is needed to calculate the number of user
|
|
|
+ * contexts in set_up_context_variables().
|
|
|
+ */
|
|
|
+ init_real_cpu_mask();
|
|
|
+}
|
|
|
+
|
|
|
+void node_affinity_destroy(void)
|
|
|
+{
|
|
|
+ struct list_head *pos, *q;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
+
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
+ list_for_each_safe(pos, q, &node_affinity.list) {
|
|
|
+ entry = list_entry(pos, struct hfi1_affinity_node,
|
|
|
+ list);
|
|
|
+ list_del(pos);
|
|
|
+ kfree(entry);
|
|
|
+ }
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
+}
|
|
|
+
|
|
|
+static struct hfi1_affinity_node *node_affinity_allocate(int node)
|
|
|
+{
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
+
|
|
|
+ entry = kzalloc(sizeof(*entry), GFP_KERNEL);
|
|
|
+ if (!entry)
|
|
|
+ return NULL;
|
|
|
+ entry->node = node;
|
|
|
+ INIT_LIST_HEAD(&entry->list);
|
|
|
+
|
|
|
+ return entry;
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * It appends an entry to the list.
|
|
|
+ * It *must* be called with node_affinity.lock held.
|
|
|
+ */
|
|
|
+static void node_affinity_add_tail(struct hfi1_affinity_node *entry)
|
|
|
+{
|
|
|
+ list_add_tail(&entry->list, &node_affinity.list);
|
|
|
+}
|
|
|
+
|
|
|
+/* It must be called with node_affinity.lock held */
|
|
|
+static struct hfi1_affinity_node *node_affinity_lookup(int node)
|
|
|
+{
|
|
|
+ struct list_head *pos;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
+
|
|
|
+ list_for_each(pos, &node_affinity.list) {
|
|
|
+ entry = list_entry(pos, struct hfi1_affinity_node, list);
|
|
|
+ if (entry->node == node)
|
|
|
+ return entry;
|
|
|
+ }
|
|
|
+
|
|
|
+ return NULL;
|
|
|
}
|
|
|
|
|
|
/*
|
|
@@ -121,10 +181,10 @@ int init_real_cpu_mask(struct hfi1_devdata *dd)
|
|
|
* to the node relative 1 as necessary.
|
|
|
*
|
|
|
*/
|
|
|
-void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
|
|
|
+int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
|
|
|
{
|
|
|
int node = pcibus_to_node(dd->pcidev->bus);
|
|
|
- struct hfi1_affinity *info = dd->affinity;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
const struct cpumask *local_mask;
|
|
|
int curr_cpu, possible, i;
|
|
|
|
|
@@ -132,55 +192,75 @@ void hfi1_dev_affinity_init(struct hfi1_devdata *dd)
|
|
|
node = numa_node_id();
|
|
|
dd->node = node;
|
|
|
|
|
|
- spin_lock_init(&info->lock);
|
|
|
-
|
|
|
- init_cpu_mask_set(&info->def_intr);
|
|
|
- init_cpu_mask_set(&info->rcv_intr);
|
|
|
- init_cpu_mask_set(&info->proc);
|
|
|
-
|
|
|
local_mask = cpumask_of_node(dd->node);
|
|
|
if (cpumask_first(local_mask) >= nr_cpu_ids)
|
|
|
local_mask = topology_core_cpumask(0);
|
|
|
- /* Use the "real" cpu mask of this node as the default */
|
|
|
- cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask);
|
|
|
-
|
|
|
- /* fill in the receive list */
|
|
|
- possible = cpumask_weight(&info->def_intr.mask);
|
|
|
- curr_cpu = cpumask_first(&info->def_intr.mask);
|
|
|
- if (possible == 1) {
|
|
|
- /* only one CPU, everyone will use it */
|
|
|
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
|
|
|
- } else {
|
|
|
- /*
|
|
|
- * Retain the first CPU in the default list for the control
|
|
|
- * context.
|
|
|
- */
|
|
|
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
|
|
|
- /*
|
|
|
- * Remove the remaining kernel receive queues from
|
|
|
- * the default list and add them to the receive list.
|
|
|
- */
|
|
|
- for (i = 0; i < dd->n_krcv_queues - 1; i++) {
|
|
|
- cpumask_clear_cpu(curr_cpu, &info->def_intr.mask);
|
|
|
- cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask);
|
|
|
- curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask);
|
|
|
- if (curr_cpu >= nr_cpu_ids)
|
|
|
- break;
|
|
|
+
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
+ entry = node_affinity_lookup(dd->node);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
+
|
|
|
+ /*
|
|
|
+ * If this is the first time this NUMA node's affinity is used,
|
|
|
+ * create an entry in the global affinity structure and initialize it.
|
|
|
+ */
|
|
|
+ if (!entry) {
|
|
|
+ entry = node_affinity_allocate(node);
|
|
|
+ if (!entry) {
|
|
|
+ dd_dev_err(dd,
|
|
|
+ "Unable to allocate global affinity node\n");
|
|
|
+ return -ENOMEM;
|
|
|
}
|
|
|
- }
|
|
|
+ init_cpu_mask_set(&entry->def_intr);
|
|
|
+ init_cpu_mask_set(&entry->rcv_intr);
|
|
|
+ /* Use the "real" cpu mask of this node as the default */
|
|
|
+ cpumask_and(&entry->def_intr.mask, &node_affinity.real_cpu_mask,
|
|
|
+ local_mask);
|
|
|
+
|
|
|
+ /* fill in the receive list */
|
|
|
+ possible = cpumask_weight(&entry->def_intr.mask);
|
|
|
+ curr_cpu = cpumask_first(&entry->def_intr.mask);
|
|
|
+
|
|
|
+ if (possible == 1) {
|
|
|
+ /* only one CPU, everyone will use it */
|
|
|
+ cpumask_set_cpu(curr_cpu, &entry->rcv_intr.mask);
|
|
|
+ } else {
|
|
|
+ /*
|
|
|
+ * Retain the first CPU in the default list for the
|
|
|
+ * control context.
|
|
|
+ */
|
|
|
+ curr_cpu = cpumask_next(curr_cpu,
|
|
|
+ &entry->def_intr.mask);
|
|
|
|
|
|
- cpumask_copy(&info->proc.mask, cpu_online_mask);
|
|
|
-}
|
|
|
+ /*
|
|
|
+ * Remove the remaining kernel receive queues from
|
|
|
+ * the default list and add them to the receive list.
|
|
|
+ */
|
|
|
+ for (i = 0; i < dd->n_krcv_queues - 1; i++) {
|
|
|
+ cpumask_clear_cpu(curr_cpu,
|
|
|
+ &entry->def_intr.mask);
|
|
|
+ cpumask_set_cpu(curr_cpu,
|
|
|
+ &entry->rcv_intr.mask);
|
|
|
+ curr_cpu = cpumask_next(curr_cpu,
|
|
|
+ &entry->def_intr.mask);
|
|
|
+ if (curr_cpu >= nr_cpu_ids)
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
-void hfi1_dev_affinity_free(struct hfi1_devdata *dd)
|
|
|
-{
|
|
|
- kfree(dd->affinity);
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
+ node_affinity_add_tail(entry);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
+ }
|
|
|
+
|
|
|
+ return 0;
|
|
|
}
|
|
|
|
|
|
int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
|
|
|
{
|
|
|
int ret;
|
|
|
cpumask_var_t diff;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
struct cpu_mask_set *set;
|
|
|
struct sdma_engine *sde = NULL;
|
|
|
struct hfi1_ctxtdata *rcd = NULL;
|
|
@@ -194,21 +274,25 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
|
|
|
if (!ret)
|
|
|
return -ENOMEM;
|
|
|
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
+ entry = node_affinity_lookup(dd->node);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
+
|
|
|
switch (msix->type) {
|
|
|
case IRQ_SDMA:
|
|
|
sde = (struct sdma_engine *)msix->arg;
|
|
|
scnprintf(extra, 64, "engine %u", sde->this_idx);
|
|
|
/* fall through */
|
|
|
case IRQ_GENERAL:
|
|
|
- set = &dd->affinity->def_intr;
|
|
|
+ set = &entry->def_intr;
|
|
|
break;
|
|
|
case IRQ_RCVCTXT:
|
|
|
rcd = (struct hfi1_ctxtdata *)msix->arg;
|
|
|
if (rcd->ctxt == HFI1_CTRL_CTXT) {
|
|
|
- set = &dd->affinity->def_intr;
|
|
|
+ set = &entry->def_intr;
|
|
|
cpu = cpumask_first(&set->mask);
|
|
|
} else {
|
|
|
- set = &dd->affinity->rcv_intr;
|
|
|
+ set = &entry->rcv_intr;
|
|
|
}
|
|
|
scnprintf(extra, 64, "ctxt %u", rcd->ctxt);
|
|
|
break;
|
|
@@ -222,8 +306,8 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
|
|
|
* is set above. Skip accounting for it. Everything else finds its
|
|
|
* CPU here.
|
|
|
*/
|
|
|
- if (cpu == -1) {
|
|
|
- spin_lock(&dd->affinity->lock);
|
|
|
+ if (cpu == -1 && set) {
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
if (cpumask_equal(&set->mask, &set->used)) {
|
|
|
/*
|
|
|
* We've used up all the CPUs, bump up the generation
|
|
@@ -235,7 +319,7 @@ int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix)
|
|
|
cpumask_andnot(diff, &set->mask, &set->used);
|
|
|
cpu = cpumask_first(diff);
|
|
|
cpumask_set_cpu(cpu, &set->used);
|
|
|
- spin_unlock(&dd->affinity->lock);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
}
|
|
|
|
|
|
switch (msix->type) {
|
|
@@ -263,30 +347,35 @@ void hfi1_put_irq_affinity(struct hfi1_devdata *dd,
|
|
|
{
|
|
|
struct cpu_mask_set *set = NULL;
|
|
|
struct hfi1_ctxtdata *rcd;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
+
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
+ entry = node_affinity_lookup(dd->node);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
|
|
|
switch (msix->type) {
|
|
|
case IRQ_SDMA:
|
|
|
case IRQ_GENERAL:
|
|
|
- set = &dd->affinity->def_intr;
|
|
|
+ set = &entry->def_intr;
|
|
|
break;
|
|
|
case IRQ_RCVCTXT:
|
|
|
rcd = (struct hfi1_ctxtdata *)msix->arg;
|
|
|
/* only do accounting for non control contexts */
|
|
|
if (rcd->ctxt != HFI1_CTRL_CTXT)
|
|
|
- set = &dd->affinity->rcv_intr;
|
|
|
+ set = &entry->rcv_intr;
|
|
|
break;
|
|
|
default:
|
|
|
return;
|
|
|
}
|
|
|
|
|
|
if (set) {
|
|
|
- spin_lock(&dd->affinity->lock);
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
cpumask_andnot(&set->used, &set->used, &msix->mask);
|
|
|
if (cpumask_empty(&set->used) && set->gen) {
|
|
|
set->gen--;
|
|
|
cpumask_copy(&set->used, &set->mask);
|
|
|
}
|
|
|
- spin_unlock(&dd->affinity->lock);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
}
|
|
|
|
|
|
irq_set_affinity_hint(msix->msix.vector, NULL);
|
|
@@ -297,9 +386,10 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
|
|
|
{
|
|
|
int cpu = -1, ret;
|
|
|
cpumask_var_t diff, mask, intrs;
|
|
|
+ struct hfi1_affinity_node *entry;
|
|
|
const struct cpumask *node_mask,
|
|
|
*proc_mask = tsk_cpus_allowed(current);
|
|
|
- struct cpu_mask_set *set = &dd->affinity->proc;
|
|
|
+ struct cpu_mask_set *set = &node_affinity.proc;
|
|
|
|
|
|
/*
|
|
|
* check whether process/context affinity has already
|
|
@@ -338,7 +428,7 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
|
|
|
if (!ret)
|
|
|
goto free_mask;
|
|
|
|
|
|
- spin_lock(&dd->affinity->lock);
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
/*
|
|
|
* If we've used all available CPUs, clear the mask and start
|
|
|
* overloading.
|
|
@@ -348,13 +438,14 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
|
|
|
cpumask_clear(&set->used);
|
|
|
}
|
|
|
|
|
|
+ entry = node_affinity_lookup(dd->node);
|
|
|
/* CPUs used by interrupt handlers */
|
|
|
- cpumask_copy(intrs, (dd->affinity->def_intr.gen ?
|
|
|
- &dd->affinity->def_intr.mask :
|
|
|
- &dd->affinity->def_intr.used));
|
|
|
- cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ?
|
|
|
- &dd->affinity->rcv_intr.mask :
|
|
|
- &dd->affinity->rcv_intr.used));
|
|
|
+ cpumask_copy(intrs, (entry->def_intr.gen ?
|
|
|
+ &entry->def_intr.mask :
|
|
|
+ &entry->def_intr.used));
|
|
|
+ cpumask_or(intrs, intrs, (entry->rcv_intr.gen ?
|
|
|
+ &entry->rcv_intr.mask :
|
|
|
+ &entry->rcv_intr.used));
|
|
|
hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl",
|
|
|
cpumask_pr_args(intrs));
|
|
|
|
|
@@ -400,7 +491,7 @@ int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node)
|
|
|
cpu = -1;
|
|
|
else
|
|
|
cpumask_set_cpu(cpu, &set->used);
|
|
|
- spin_unlock(&dd->affinity->lock);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
|
|
|
free_cpumask_var(intrs);
|
|
|
free_mask:
|
|
@@ -413,16 +504,16 @@ done:
|
|
|
|
|
|
void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu)
|
|
|
{
|
|
|
- struct cpu_mask_set *set = &dd->affinity->proc;
|
|
|
+ struct cpu_mask_set *set = &node_affinity.proc;
|
|
|
|
|
|
if (cpu < 0)
|
|
|
return;
|
|
|
- spin_lock(&dd->affinity->lock);
|
|
|
+ spin_lock(&node_affinity.lock);
|
|
|
cpumask_clear_cpu(cpu, &set->used);
|
|
|
if (cpumask_empty(&set->used) && set->gen) {
|
|
|
set->gen--;
|
|
|
cpumask_copy(&set->used, &set->mask);
|
|
|
}
|
|
|
- spin_unlock(&dd->affinity->lock);
|
|
|
+ spin_unlock(&node_affinity.lock);
|
|
|
}
|
|
|
|