|
@@ -43,6 +43,7 @@
|
|
#include <asm/desc.h>
|
|
#include <asm/desc.h>
|
|
#include <asm/debugreg.h>
|
|
#include <asm/debugreg.h>
|
|
#include <asm/kvm_para.h>
|
|
#include <asm/kvm_para.h>
|
|
|
|
+#include <asm/irq_remapping.h>
|
|
|
|
|
|
#include <asm/virtext.h>
|
|
#include <asm/virtext.h>
|
|
#include "trace.h"
|
|
#include "trace.h"
|
|
@@ -200,6 +201,23 @@ struct vcpu_svm {
|
|
struct page *avic_backing_page;
|
|
struct page *avic_backing_page;
|
|
u64 *avic_physical_id_cache;
|
|
u64 *avic_physical_id_cache;
|
|
bool avic_is_running;
|
|
bool avic_is_running;
|
|
|
|
+
|
|
|
|
+ /*
|
|
|
|
+ * Per-vcpu list of struct amd_svm_iommu_ir:
|
|
|
|
+ * This is used mainly to store interrupt remapping information used
|
|
|
|
+ * when update the vcpu affinity. This avoids the need to scan for
|
|
|
|
+ * IRTE and try to match ga_tag in the IOMMU driver.
|
|
|
|
+ */
|
|
|
|
+ struct list_head ir_list;
|
|
|
|
+ spinlock_t ir_list_lock;
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * This is a wrapper of struct amd_iommu_ir_data.
|
|
|
|
+ */
|
|
|
|
+struct amd_svm_iommu_ir {
|
|
|
|
+ struct list_head node; /* Used by SVM for per-vcpu ir_list */
|
|
|
|
+ void *data; /* Storing pointer to struct amd_ir_data */
|
|
};
|
|
};
|
|
|
|
|
|
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
|
|
#define AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK (0xFF)
|
|
@@ -1440,31 +1458,34 @@ free_avic:
|
|
return err;
|
|
return err;
|
|
}
|
|
}
|
|
|
|
|
|
-/**
|
|
|
|
- * This function is called during VCPU halt/unhalt.
|
|
|
|
- */
|
|
|
|
-static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
|
|
|
|
|
+static inline int
|
|
|
|
+avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r)
|
|
{
|
|
{
|
|
- u64 entry;
|
|
|
|
- int h_physical_id = kvm_cpu_get_apicid(vcpu->cpu);
|
|
|
|
|
|
+ int ret = 0;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+ struct amd_svm_iommu_ir *ir;
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
|
|
- if (!kvm_vcpu_apicv_active(vcpu))
|
|
|
|
- return;
|
|
|
|
-
|
|
|
|
- svm->avic_is_running = is_run;
|
|
|
|
|
|
+ if (!kvm_arch_has_assigned_device(vcpu->kvm))
|
|
|
|
+ return 0;
|
|
|
|
|
|
- /* ID = 0xff (broadcast), ID > 0xff (reserved) */
|
|
|
|
- if (WARN_ON(h_physical_id >= AVIC_MAX_PHYSICAL_ID_COUNT))
|
|
|
|
- return;
|
|
|
|
|
|
+ /*
|
|
|
|
+ * Here, we go through the per-vcpu ir_list to update all existing
|
|
|
|
+ * interrupt remapping table entry targeting this vcpu.
|
|
|
|
+ */
|
|
|
|
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
|
|
|
|
|
|
- entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
|
|
|
- WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK));
|
|
|
|
|
|
+ if (list_empty(&svm->ir_list))
|
|
|
|
+ goto out;
|
|
|
|
|
|
- entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
|
|
- if (is_run)
|
|
|
|
- entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
|
|
- WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
|
|
|
|
+ list_for_each_entry(ir, &svm->ir_list, node) {
|
|
|
|
+ ret = amd_iommu_update_ga(cpu, r, ir->data);
|
|
|
|
+ if (ret)
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+out:
|
|
|
|
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
|
|
|
+ return ret;
|
|
}
|
|
}
|
|
|
|
|
|
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
@@ -1491,6 +1512,8 @@ static void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
|
|
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
|
|
|
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
|
|
+ avic_update_iommu_vcpu_affinity(vcpu, h_physical_id,
|
|
|
|
+ svm->avic_is_running);
|
|
}
|
|
}
|
|
|
|
|
|
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|
static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|
@@ -1502,10 +1525,27 @@ static void avic_vcpu_put(struct kvm_vcpu *vcpu)
|
|
return;
|
|
return;
|
|
|
|
|
|
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
|
entry = READ_ONCE(*(svm->avic_physical_id_cache));
|
|
|
|
+ if (entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)
|
|
|
|
+ avic_update_iommu_vcpu_affinity(vcpu, -1, 0);
|
|
|
|
+
|
|
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK;
|
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+/**
|
|
|
|
+ * This function is called during VCPU halt/unhalt.
|
|
|
|
+ */
|
|
|
|
+static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run)
|
|
|
|
+{
|
|
|
|
+ struct vcpu_svm *svm = to_svm(vcpu);
|
|
|
|
+
|
|
|
|
+ svm->avic_is_running = is_run;
|
|
|
|
+ if (is_run)
|
|
|
|
+ avic_vcpu_load(vcpu, vcpu->cpu);
|
|
|
|
+ else
|
|
|
|
+ avic_vcpu_put(vcpu);
|
|
|
|
+}
|
|
|
|
+
|
|
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|
static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
|
|
{
|
|
{
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
@@ -1567,6 +1607,9 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
|
|
err = avic_init_backing_page(&svm->vcpu);
|
|
err = avic_init_backing_page(&svm->vcpu);
|
|
if (err)
|
|
if (err)
|
|
goto free_page4;
|
|
goto free_page4;
|
|
|
|
+
|
|
|
|
+ INIT_LIST_HEAD(&svm->ir_list);
|
|
|
|
+ spin_lock_init(&svm->ir_list_lock);
|
|
}
|
|
}
|
|
|
|
|
|
/* We initialize this flag to true to make sure that the is_running
|
|
/* We initialize this flag to true to make sure that the is_running
|
|
@@ -4363,6 +4406,209 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
|
|
kvm_vcpu_wake_up(vcpu);
|
|
kvm_vcpu_wake_up(vcpu);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
+static void svm_ir_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
|
|
|
+{
|
|
|
|
+ unsigned long flags;
|
|
|
|
+ struct amd_svm_iommu_ir *cur;
|
|
|
|
+
|
|
|
|
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
|
|
|
|
+ list_for_each_entry(cur, &svm->ir_list, node) {
|
|
|
|
+ if (cur->data != pi->ir_data)
|
|
|
|
+ continue;
|
|
|
|
+ list_del(&cur->node);
|
|
|
|
+ kfree(cur);
|
|
|
|
+ break;
|
|
|
|
+ }
|
|
|
|
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+static int svm_ir_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi)
|
|
|
|
+{
|
|
|
|
+ int ret = 0;
|
|
|
|
+ unsigned long flags;
|
|
|
|
+ struct amd_svm_iommu_ir *ir;
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * In some cases, the existing irte is updaed and re-set,
|
|
|
|
+ * so we need to check here if it's already been * added
|
|
|
|
+ * to the ir_list.
|
|
|
|
+ */
|
|
|
|
+ if (pi->ir_data && (pi->prev_ga_tag != 0)) {
|
|
|
|
+ struct kvm *kvm = svm->vcpu.kvm;
|
|
|
|
+ u32 vcpu_id = AVIC_GATAG_TO_VCPUID(pi->prev_ga_tag);
|
|
|
|
+ struct kvm_vcpu *prev_vcpu = kvm_get_vcpu_by_id(kvm, vcpu_id);
|
|
|
|
+ struct vcpu_svm *prev_svm;
|
|
|
|
+
|
|
|
|
+ if (!prev_vcpu) {
|
|
|
|
+ ret = -EINVAL;
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ prev_svm = to_svm(prev_vcpu);
|
|
|
|
+ svm_ir_list_del(prev_svm, pi);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Allocating new amd_iommu_pi_data, which will get
|
|
|
|
+ * add to the per-vcpu ir_list.
|
|
|
|
+ */
|
|
|
|
+ ir = kzalloc(sizeof(struct amd_svm_iommu_ir), GFP_KERNEL);
|
|
|
|
+ if (!ir) {
|
|
|
|
+ ret = -ENOMEM;
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+ ir->data = pi->ir_data;
|
|
|
|
+
|
|
|
|
+ spin_lock_irqsave(&svm->ir_list_lock, flags);
|
|
|
|
+ list_add(&ir->node, &svm->ir_list);
|
|
|
|
+ spin_unlock_irqrestore(&svm->ir_list_lock, flags);
|
|
|
|
+out:
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/**
|
|
|
|
+ * Note:
|
|
|
|
+ * The HW cannot support posting multicast/broadcast
|
|
|
|
+ * interrupts to a vCPU. So, we still use legacy interrupt
|
|
|
|
+ * remapping for these kind of interrupts.
|
|
|
|
+ *
|
|
|
|
+ * For lowest-priority interrupts, we only support
|
|
|
|
+ * those with single CPU as the destination, e.g. user
|
|
|
|
+ * configures the interrupts via /proc/irq or uses
|
|
|
|
+ * irqbalance to make the interrupts single-CPU.
|
|
|
|
+ */
|
|
|
|
+static int
|
|
|
|
+get_pi_vcpu_info(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e,
|
|
|
|
+ struct vcpu_data *vcpu_info, struct vcpu_svm **svm)
|
|
|
|
+{
|
|
|
|
+ struct kvm_lapic_irq irq;
|
|
|
|
+ struct kvm_vcpu *vcpu = NULL;
|
|
|
|
+
|
|
|
|
+ kvm_set_msi_irq(kvm, e, &irq);
|
|
|
|
+
|
|
|
|
+ if (!kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) {
|
|
|
|
+ pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n",
|
|
|
|
+ __func__, irq.vector);
|
|
|
|
+ return -1;
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ pr_debug("SVM: %s: use GA mode for irq %u\n", __func__,
|
|
|
|
+ irq.vector);
|
|
|
|
+ *svm = to_svm(vcpu);
|
|
|
|
+ vcpu_info->pi_desc_addr = page_to_phys((*svm)->avic_backing_page);
|
|
|
|
+ vcpu_info->vector = irq.vector;
|
|
|
|
+
|
|
|
|
+ return 0;
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/*
|
|
|
|
+ * svm_update_pi_irte - set IRTE for Posted-Interrupts
|
|
|
|
+ *
|
|
|
|
+ * @kvm: kvm
|
|
|
|
+ * @host_irq: host irq of the interrupt
|
|
|
|
+ * @guest_irq: gsi of the interrupt
|
|
|
|
+ * @set: set or unset PI
|
|
|
|
+ * returns 0 on success, < 0 on failure
|
|
|
|
+ */
|
|
|
|
+static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
|
|
|
|
+ uint32_t guest_irq, bool set)
|
|
|
|
+{
|
|
|
|
+ struct kvm_kernel_irq_routing_entry *e;
|
|
|
|
+ struct kvm_irq_routing_table *irq_rt;
|
|
|
|
+ int idx, ret = -EINVAL;
|
|
|
|
+
|
|
|
|
+ if (!kvm_arch_has_assigned_device(kvm) ||
|
|
|
|
+ !irq_remapping_cap(IRQ_POSTING_CAP))
|
|
|
|
+ return 0;
|
|
|
|
+
|
|
|
|
+ pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n",
|
|
|
|
+ __func__, host_irq, guest_irq, set);
|
|
|
|
+
|
|
|
|
+ idx = srcu_read_lock(&kvm->irq_srcu);
|
|
|
|
+ irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
|
|
|
|
+ WARN_ON(guest_irq >= irq_rt->nr_rt_entries);
|
|
|
|
+
|
|
|
|
+ hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
|
|
|
|
+ struct vcpu_data vcpu_info;
|
|
|
|
+ struct vcpu_svm *svm = NULL;
|
|
|
|
+
|
|
|
|
+ if (e->type != KVM_IRQ_ROUTING_MSI)
|
|
|
|
+ continue;
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Here, we setup with legacy mode in the following cases:
|
|
|
|
+ * 1. When cannot target interrupt to a specific vcpu.
|
|
|
|
+ * 2. Unsetting posted interrupt.
|
|
|
|
+ * 3. APIC virtialization is disabled for the vcpu.
|
|
|
|
+ */
|
|
|
|
+ if (!get_pi_vcpu_info(kvm, e, &vcpu_info, &svm) && set &&
|
|
|
|
+ kvm_vcpu_apicv_active(&svm->vcpu)) {
|
|
|
|
+ struct amd_iommu_pi_data pi;
|
|
|
|
+
|
|
|
|
+ /* Try to enable guest_mode in IRTE */
|
|
|
|
+ pi.base = page_to_phys(svm->avic_backing_page) & AVIC_HPA_MASK;
|
|
|
|
+ pi.ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id,
|
|
|
|
+ svm->vcpu.vcpu_id);
|
|
|
|
+ pi.is_guest_mode = true;
|
|
|
|
+ pi.vcpu_data = &vcpu_info;
|
|
|
|
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Here, we successfully setting up vcpu affinity in
|
|
|
|
+ * IOMMU guest mode. Now, we need to store the posted
|
|
|
|
+ * interrupt information in a per-vcpu ir_list so that
|
|
|
|
+ * we can reference to them directly when we update vcpu
|
|
|
|
+ * scheduling information in IOMMU irte.
|
|
|
|
+ */
|
|
|
|
+ if (!ret && pi.is_guest_mode)
|
|
|
|
+ svm_ir_list_add(svm, &pi);
|
|
|
|
+ } else {
|
|
|
|
+ /* Use legacy mode in IRTE */
|
|
|
|
+ struct amd_iommu_pi_data pi;
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Here, pi is used to:
|
|
|
|
+ * - Tell IOMMU to use legacy mode for this interrupt.
|
|
|
|
+ * - Retrieve ga_tag of prior interrupt remapping data.
|
|
|
|
+ */
|
|
|
|
+ pi.is_guest_mode = false;
|
|
|
|
+ ret = irq_set_vcpu_affinity(host_irq, &pi);
|
|
|
|
+
|
|
|
|
+ /**
|
|
|
|
+ * Check if the posted interrupt was previously
|
|
|
|
+ * setup with the guest_mode by checking if the ga_tag
|
|
|
|
+ * was cached. If so, we need to clean up the per-vcpu
|
|
|
|
+ * ir_list.
|
|
|
|
+ */
|
|
|
|
+ if (!ret && pi.prev_ga_tag) {
|
|
|
|
+ int id = AVIC_GATAG_TO_VCPUID(pi.prev_ga_tag);
|
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
|
+
|
|
|
|
+ vcpu = kvm_get_vcpu_by_id(kvm, id);
|
|
|
|
+ if (vcpu)
|
|
|
|
+ svm_ir_list_del(to_svm(vcpu), &pi);
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!ret && svm) {
|
|
|
|
+ trace_kvm_pi_irte_update(svm->vcpu.vcpu_id,
|
|
|
|
+ host_irq, e->gsi,
|
|
|
|
+ vcpu_info.vector,
|
|
|
|
+ vcpu_info.pi_desc_addr, set);
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (ret < 0) {
|
|
|
|
+ pr_err("%s: failed to update PI IRTE\n", __func__);
|
|
|
|
+ goto out;
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ ret = 0;
|
|
|
|
+out:
|
|
|
|
+ srcu_read_unlock(&kvm->irq_srcu, idx);
|
|
|
|
+ return ret;
|
|
|
|
+}
|
|
|
|
+
|
|
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
|
|
{
|
|
{
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
struct vcpu_svm *svm = to_svm(vcpu);
|
|
@@ -5195,6 +5441,7 @@ static struct kvm_x86_ops svm_x86_ops = {
|
|
|
|
|
|
.pmu_ops = &amd_pmu_ops,
|
|
.pmu_ops = &amd_pmu_ops,
|
|
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
|
.deliver_posted_interrupt = svm_deliver_avic_intr,
|
|
|
|
+ .update_pi_irte = svm_update_pi_irte,
|
|
};
|
|
};
|
|
|
|
|
|
static int __init svm_init(void)
|
|
static int __init svm_init(void)
|