|
@@ -410,6 +410,11 @@ struct npu_context {
|
|
|
void *priv;
|
|
|
};
|
|
|
|
|
|
+struct mmio_atsd_reg {
|
|
|
+ struct npu *npu;
|
|
|
+ int reg;
|
|
|
+};
|
|
|
+
|
|
|
/*
|
|
|
* Find a free MMIO ATSD register and mark it in use. Return -ENOSPC
|
|
|
* if none are available.
|
|
@@ -419,7 +424,7 @@ static int get_mmio_atsd_reg(struct npu *npu)
|
|
|
int i;
|
|
|
|
|
|
for (i = 0; i < npu->mmio_atsd_count; i++) {
|
|
|
- if (!test_and_set_bit(i, &npu->mmio_atsd_usage))
|
|
|
+ if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage))
|
|
|
return i;
|
|
|
}
|
|
|
|
|
@@ -428,86 +433,90 @@ static int get_mmio_atsd_reg(struct npu *npu)
|
|
|
|
|
|
static void put_mmio_atsd_reg(struct npu *npu, int reg)
|
|
|
{
|
|
|
- clear_bit(reg, &npu->mmio_atsd_usage);
|
|
|
+ clear_bit_unlock(reg, &npu->mmio_atsd_usage);
|
|
|
}
|
|
|
|
|
|
/* MMIO ATSD register offsets */
|
|
|
#define XTS_ATSD_AVA 1
|
|
|
#define XTS_ATSD_STAT 2
|
|
|
|
|
|
-static int mmio_launch_invalidate(struct npu *npu, unsigned long launch,
|
|
|
- unsigned long va)
|
|
|
+static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
|
|
|
+ unsigned long launch, unsigned long va)
|
|
|
{
|
|
|
- int mmio_atsd_reg;
|
|
|
-
|
|
|
- do {
|
|
|
- mmio_atsd_reg = get_mmio_atsd_reg(npu);
|
|
|
- cpu_relax();
|
|
|
- } while (mmio_atsd_reg < 0);
|
|
|
+ struct npu *npu = mmio_atsd_reg->npu;
|
|
|
+ int reg = mmio_atsd_reg->reg;
|
|
|
|
|
|
__raw_writeq(cpu_to_be64(va),
|
|
|
- npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA);
|
|
|
+ npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
|
|
|
eieio();
|
|
|
- __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]);
|
|
|
-
|
|
|
- return mmio_atsd_reg;
|
|
|
+ __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]);
|
|
|
}
|
|
|
|
|
|
-static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush)
|
|
|
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
|
|
|
+ unsigned long pid, bool flush)
|
|
|
{
|
|
|
+ int i;
|
|
|
unsigned long launch;
|
|
|
|
|
|
- /* IS set to invalidate matching PID */
|
|
|
- launch = PPC_BIT(12);
|
|
|
+ for (i = 0; i <= max_npu2_index; i++) {
|
|
|
+ if (mmio_atsd_reg[i].reg < 0)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* IS set to invalidate matching PID */
|
|
|
+ launch = PPC_BIT(12);
|
|
|
|
|
|
- /* PRS set to process-scoped */
|
|
|
- launch |= PPC_BIT(13);
|
|
|
+ /* PRS set to process-scoped */
|
|
|
+ launch |= PPC_BIT(13);
|
|
|
|
|
|
- /* AP */
|
|
|
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
|
|
|
+ /* AP */
|
|
|
+ launch |= (u64)
|
|
|
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
|
|
|
|
|
|
- /* PID */
|
|
|
- launch |= pid << PPC_BITLSHIFT(38);
|
|
|
+ /* PID */
|
|
|
+ launch |= pid << PPC_BITLSHIFT(38);
|
|
|
|
|
|
- /* No flush */
|
|
|
- launch |= !flush << PPC_BITLSHIFT(39);
|
|
|
+ /* No flush */
|
|
|
+ launch |= !flush << PPC_BITLSHIFT(39);
|
|
|
|
|
|
- /* Invalidating the entire process doesn't use a va */
|
|
|
- return mmio_launch_invalidate(npu, launch, 0);
|
|
|
+ /* Invalidating the entire process doesn't use a va */
|
|
|
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
-static int mmio_invalidate_va(struct npu *npu, unsigned long va,
|
|
|
- unsigned long pid, bool flush)
|
|
|
+static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
|
|
|
+ unsigned long va, unsigned long pid, bool flush)
|
|
|
{
|
|
|
+ int i;
|
|
|
unsigned long launch;
|
|
|
|
|
|
- /* IS set to invalidate target VA */
|
|
|
- launch = 0;
|
|
|
+ for (i = 0; i <= max_npu2_index; i++) {
|
|
|
+ if (mmio_atsd_reg[i].reg < 0)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ /* IS set to invalidate target VA */
|
|
|
+ launch = 0;
|
|
|
|
|
|
- /* PRS set to process scoped */
|
|
|
- launch |= PPC_BIT(13);
|
|
|
+ /* PRS set to process scoped */
|
|
|
+ launch |= PPC_BIT(13);
|
|
|
|
|
|
- /* AP */
|
|
|
- launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
|
|
|
+ /* AP */
|
|
|
+ launch |= (u64)
|
|
|
+ mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
|
|
|
|
|
|
- /* PID */
|
|
|
- launch |= pid << PPC_BITLSHIFT(38);
|
|
|
+ /* PID */
|
|
|
+ launch |= pid << PPC_BITLSHIFT(38);
|
|
|
|
|
|
- /* No flush */
|
|
|
- launch |= !flush << PPC_BITLSHIFT(39);
|
|
|
+ /* No flush */
|
|
|
+ launch |= !flush << PPC_BITLSHIFT(39);
|
|
|
|
|
|
- return mmio_launch_invalidate(npu, launch, va);
|
|
|
+ mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
|
|
|
|
|
|
-struct mmio_atsd_reg {
|
|
|
- struct npu *npu;
|
|
|
- int reg;
|
|
|
-};
|
|
|
-
|
|
|
static void mmio_invalidate_wait(
|
|
|
- struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush)
|
|
|
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
|
|
|
{
|
|
|
struct npu *npu;
|
|
|
int i, reg;
|
|
@@ -522,16 +531,67 @@ static void mmio_invalidate_wait(
|
|
|
reg = mmio_atsd_reg[i].reg;
|
|
|
while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT))
|
|
|
cpu_relax();
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Acquires all the address translation shootdown (ATSD) registers required to
|
|
|
+ * launch an ATSD on all links this npu_context is active on.
|
|
|
+ */
|
|
|
+static void acquire_atsd_reg(struct npu_context *npu_context,
|
|
|
+ struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
|
|
|
+{
|
|
|
+ int i, j;
|
|
|
+ struct npu *npu;
|
|
|
+ struct pci_dev *npdev;
|
|
|
+ struct pnv_phb *nphb;
|
|
|
|
|
|
- put_mmio_atsd_reg(npu, reg);
|
|
|
+ for (i = 0; i <= max_npu2_index; i++) {
|
|
|
+ mmio_atsd_reg[i].reg = -1;
|
|
|
+ for (j = 0; j < NV_MAX_LINKS; j++) {
|
|
|
+ /*
|
|
|
+ * There are no ordering requirements with respect to
|
|
|
+ * the setup of struct npu_context, but to ensure
|
|
|
+ * consistent behaviour we need to ensure npdev[][] is
|
|
|
+ * only read once.
|
|
|
+ */
|
|
|
+ npdev = READ_ONCE(npu_context->npdev[i][j]);
|
|
|
+ if (!npdev)
|
|
|
+ continue;
|
|
|
|
|
|
+ nphb = pci_bus_to_host(npdev->bus)->private_data;
|
|
|
+ npu = &nphb->npu;
|
|
|
+ mmio_atsd_reg[i].npu = npu;
|
|
|
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
|
|
|
+ while (mmio_atsd_reg[i].reg < 0) {
|
|
|
+ mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu);
|
|
|
+ cpu_relax();
|
|
|
+ }
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
+/*
|
|
|
+ * Release previously acquired ATSD registers. To avoid deadlocks the registers
|
|
|
+ * must be released in the same order they were acquired above in
|
|
|
+ * acquire_atsd_reg.
|
|
|
+ */
|
|
|
+static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS])
|
|
|
+{
|
|
|
+ int i;
|
|
|
+
|
|
|
+ for (i = 0; i <= max_npu2_index; i++) {
|
|
|
/*
|
|
|
- * The GPU requires two flush ATSDs to ensure all entries have
|
|
|
- * been flushed. We use PID 0 as it will never be used for a
|
|
|
- * process on the GPU.
|
|
|
+ * We can't rely on npu_context->npdev[][] being the same here
|
|
|
+ * as when acquire_atsd_reg() was called, hence we use the
|
|
|
+ * values stored in mmio_atsd_reg during the acquire phase
|
|
|
+ * rather than re-reading npdev[][].
|
|
|
*/
|
|
|
- if (flush)
|
|
|
- mmio_invalidate_pid(npu, 0, true);
|
|
|
+ if (mmio_atsd_reg[i].reg < 0)
|
|
|
+ continue;
|
|
|
+
|
|
|
+ put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg);
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -542,10 +602,6 @@ static void mmio_invalidate_wait(
|
|
|
static void mmio_invalidate(struct npu_context *npu_context, int va,
|
|
|
unsigned long address, bool flush)
|
|
|
{
|
|
|
- int i, j;
|
|
|
- struct npu *npu;
|
|
|
- struct pnv_phb *nphb;
|
|
|
- struct pci_dev *npdev;
|
|
|
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
|
|
|
unsigned long pid = npu_context->mm->context.id;
|
|
|
|
|
@@ -561,37 +617,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va,
|
|
|
* Loop over all the NPUs this process is active on and launch
|
|
|
* an invalidate.
|
|
|
*/
|
|
|
- for (i = 0; i <= max_npu2_index; i++) {
|
|
|
- mmio_atsd_reg[i].reg = -1;
|
|
|
- for (j = 0; j < NV_MAX_LINKS; j++) {
|
|
|
- npdev = npu_context->npdev[i][j];
|
|
|
- if (!npdev)
|
|
|
- continue;
|
|
|
-
|
|
|
- nphb = pci_bus_to_host(npdev->bus)->private_data;
|
|
|
- npu = &nphb->npu;
|
|
|
- mmio_atsd_reg[i].npu = npu;
|
|
|
-
|
|
|
- if (va)
|
|
|
- mmio_atsd_reg[i].reg =
|
|
|
- mmio_invalidate_va(npu, address, pid,
|
|
|
- flush);
|
|
|
- else
|
|
|
- mmio_atsd_reg[i].reg =
|
|
|
- mmio_invalidate_pid(npu, pid, flush);
|
|
|
-
|
|
|
- /*
|
|
|
- * The NPU hardware forwards the shootdown to all GPUs
|
|
|
- * so we only have to launch one shootdown per NPU.
|
|
|
- */
|
|
|
- break;
|
|
|
- }
|
|
|
+ acquire_atsd_reg(npu_context, mmio_atsd_reg);
|
|
|
+ if (va)
|
|
|
+ mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
|
|
|
+ else
|
|
|
+ mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
|
|
|
+
|
|
|
+ mmio_invalidate_wait(mmio_atsd_reg);
|
|
|
+ if (flush) {
|
|
|
+ /*
|
|
|
+ * The GPU requires two flush ATSDs to ensure all entries have
|
|
|
+ * been flushed. We use PID 0 as it will never be used for a
|
|
|
+ * process on the GPU.
|
|
|
+ */
|
|
|
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
|
|
|
+ mmio_invalidate_wait(mmio_atsd_reg);
|
|
|
+ mmio_invalidate_pid(mmio_atsd_reg, 0, true);
|
|
|
+ mmio_invalidate_wait(mmio_atsd_reg);
|
|
|
}
|
|
|
-
|
|
|
- mmio_invalidate_wait(mmio_atsd_reg, flush);
|
|
|
- if (flush)
|
|
|
- /* Wait for the flush to complete */
|
|
|
- mmio_invalidate_wait(mmio_atsd_reg, false);
|
|
|
+ release_atsd_reg(mmio_atsd_reg);
|
|
|
}
|
|
|
|
|
|
static void pnv_npu2_mn_release(struct mmu_notifier *mn,
|
|
@@ -726,7 +770,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev,
|
|
|
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
|
|
|
&nvlink_index)))
|
|
|
return ERR_PTR(-ENODEV);
|
|
|
- npu_context->npdev[npu->index][nvlink_index] = npdev;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * npdev is a pci_dev pointer setup by the PCI code. We assign it to
|
|
|
+ * npdev[][] to indicate to the mmu notifiers that an invalidation
|
|
|
+ * should also be sent over this nvlink. The notifiers don't use any
|
|
|
+ * other fields in npu_context, so we just need to ensure that when they
|
|
|
+ * deference npu_context->npdev[][] it is either a valid pointer or
|
|
|
+ * NULL.
|
|
|
+ */
|
|
|
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev);
|
|
|
|
|
|
if (!nphb->npu.nmmu_flush) {
|
|
|
/*
|
|
@@ -778,7 +831,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context,
|
|
|
if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index",
|
|
|
&nvlink_index)))
|
|
|
return;
|
|
|
- npu_context->npdev[npu->index][nvlink_index] = NULL;
|
|
|
+ WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL);
|
|
|
opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id,
|
|
|
PCI_DEVID(gpdev->bus->number, gpdev->devfn));
|
|
|
kref_put(&npu_context->kref, pnv_npu2_release_context);
|