|
@@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
|
|
|
static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu);
|
|
|
static DEFINE_PER_CPU(struct desc_ptr, host_gdt);
|
|
|
|
|
|
+/*
|
|
|
+ * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we
|
|
|
+ * can find which vCPU should be waken up.
|
|
|
+ */
|
|
|
+static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu);
|
|
|
+static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock);
|
|
|
+
|
|
|
static unsigned long *vmx_io_bitmap_a;
|
|
|
static unsigned long *vmx_io_bitmap_b;
|
|
|
static unsigned long *vmx_msr_bitmap_legacy;
|
|
@@ -2986,6 +2993,8 @@ static int hardware_enable(void)
|
|
|
return -EBUSY;
|
|
|
|
|
|
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
|
|
|
+ INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu));
|
|
|
+ spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
|
|
|
|
|
/*
|
|
|
* Now we can enable the vmclear operation in kdump
|
|
@@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void)
|
|
|
ple_window_grow, INT_MIN);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR.
|
|
|
+ */
|
|
|
+static void wakeup_handler(void)
|
|
|
+{
|
|
|
+ struct kvm_vcpu *vcpu;
|
|
|
+ int cpu = smp_processor_id();
|
|
|
+
|
|
|
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
|
|
+ list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu),
|
|
|
+ blocked_vcpu_list) {
|
|
|
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
|
|
+
|
|
|
+ if (pi_test_on(pi_desc) == 1)
|
|
|
+ kvm_vcpu_kick(vcpu);
|
|
|
+ }
|
|
|
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu));
|
|
|
+}
|
|
|
+
|
|
|
static __init int hardware_setup(void)
|
|
|
{
|
|
|
int r = -ENOMEM, i, msr;
|
|
@@ -6231,6 +6259,8 @@ static __init int hardware_setup(void)
|
|
|
kvm_x86_ops->enable_log_dirty_pt_masked = NULL;
|
|
|
}
|
|
|
|
|
|
+ kvm_set_posted_intr_wakeup_handler(wakeup_handler);
|
|
|
+
|
|
|
return alloc_kvm_area();
|
|
|
|
|
|
out8:
|
|
@@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
|
|
|
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
|
|
|
}
|
|
|
|
|
|
+/*
|
|
|
+ * This routine does the following things for vCPU which is going
|
|
|
+ * to be blocked if VT-d PI is enabled.
|
|
|
+ * - Store the vCPU to the wakeup list, so when interrupts happen
|
|
|
+ * we can find the right vCPU to wake up.
|
|
|
+ * - Change the Posted-interrupt descriptor as below:
|
|
|
+ * 'NDST' <-- vcpu->pre_pcpu
|
|
|
+ * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR
|
|
|
+ * - If 'ON' is set during this process, which means at least one
|
|
|
+ * interrupt is posted for this vCPU, we cannot block it, in
|
|
|
+ * this case, return 1, otherwise, return 0.
|
|
|
+ *
|
|
|
+ */
|
|
|
+static int vmx_pre_block(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ unsigned long flags;
|
|
|
+ unsigned int dest;
|
|
|
+ struct pi_desc old, new;
|
|
|
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
|
|
+
|
|
|
+ if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
|
|
+ !irq_remapping_cap(IRQ_POSTING_CAP))
|
|
|
+ return 0;
|
|
|
+
|
|
|
+ vcpu->pre_pcpu = vcpu->cpu;
|
|
|
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+ list_add_tail(&vcpu->blocked_vcpu_list,
|
|
|
+ &per_cpu(blocked_vcpu_on_cpu,
|
|
|
+ vcpu->pre_pcpu));
|
|
|
+ spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+
|
|
|
+ do {
|
|
|
+ old.control = new.control = pi_desc->control;
|
|
|
+
|
|
|
+ /*
|
|
|
+ * We should not block the vCPU if
|
|
|
+ * an interrupt is posted for it.
|
|
|
+ */
|
|
|
+ if (pi_test_on(pi_desc) == 1) {
|
|
|
+ spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+ list_del(&vcpu->blocked_vcpu_list);
|
|
|
+ spin_unlock_irqrestore(
|
|
|
+ &per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+ vcpu->pre_pcpu = -1;
|
|
|
+
|
|
|
+ return 1;
|
|
|
+ }
|
|
|
+
|
|
|
+ WARN((pi_desc->sn == 1),
|
|
|
+ "Warning: SN field of posted-interrupts "
|
|
|
+ "is set before blocking\n");
|
|
|
+
|
|
|
+ /*
|
|
|
+ * Since vCPU can be preempted during this process,
|
|
|
+ * vcpu->cpu could be different with pre_pcpu, we
|
|
|
+ * need to set pre_pcpu as the destination of wakeup
|
|
|
+ * notification event, then we can find the right vCPU
|
|
|
+ * to wakeup in wakeup handler if interrupts happen
|
|
|
+ * when the vCPU is in blocked state.
|
|
|
+ */
|
|
|
+ dest = cpu_physical_id(vcpu->pre_pcpu);
|
|
|
+
|
|
|
+ if (x2apic_enabled())
|
|
|
+ new.ndst = dest;
|
|
|
+ else
|
|
|
+ new.ndst = (dest << 8) & 0xFF00;
|
|
|
+
|
|
|
+ /* set 'NV' to 'wakeup vector' */
|
|
|
+ new.nv = POSTED_INTR_WAKEUP_VECTOR;
|
|
|
+ } while (cmpxchg(&pi_desc->control, old.control,
|
|
|
+ new.control) != old.control);
|
|
|
+
|
|
|
+ return 0;
|
|
|
+}
|
|
|
+
|
|
|
+static void vmx_post_block(struct kvm_vcpu *vcpu)
|
|
|
+{
|
|
|
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
|
|
|
+ struct pi_desc old, new;
|
|
|
+ unsigned int dest;
|
|
|
+ unsigned long flags;
|
|
|
+
|
|
|
+ if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
|
|
|
+ !irq_remapping_cap(IRQ_POSTING_CAP))
|
|
|
+ return;
|
|
|
+
|
|
|
+ do {
|
|
|
+ old.control = new.control = pi_desc->control;
|
|
|
+
|
|
|
+ dest = cpu_physical_id(vcpu->cpu);
|
|
|
+
|
|
|
+ if (x2apic_enabled())
|
|
|
+ new.ndst = dest;
|
|
|
+ else
|
|
|
+ new.ndst = (dest << 8) & 0xFF00;
|
|
|
+
|
|
|
+ /* Allow posting non-urgent interrupts */
|
|
|
+ new.sn = 0;
|
|
|
+
|
|
|
+ /* set 'NV' to 'notification vector' */
|
|
|
+ new.nv = POSTED_INTR_VECTOR;
|
|
|
+ } while (cmpxchg(&pi_desc->control, old.control,
|
|
|
+ new.control) != old.control);
|
|
|
+
|
|
|
+ if(vcpu->pre_pcpu != -1) {
|
|
|
+ spin_lock_irqsave(
|
|
|
+ &per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+ list_del(&vcpu->blocked_vcpu_list);
|
|
|
+ spin_unlock_irqrestore(
|
|
|
+ &per_cpu(blocked_vcpu_on_cpu_lock,
|
|
|
+ vcpu->pre_pcpu), flags);
|
|
|
+ vcpu->pre_pcpu = -1;
|
|
|
+ }
|
|
|
+}
|
|
|
+
|
|
|
/*
|
|
|
* vmx_update_pi_irte - set IRTE for Posted-Interrupts
|
|
|
*
|
|
@@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = {
|
|
|
.flush_log_dirty = vmx_flush_log_dirty,
|
|
|
.enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
|
|
|
|
|
|
+ .pre_block = vmx_pre_block,
|
|
|
+ .post_block = vmx_post_block,
|
|
|
+
|
|
|
.pmu_ops = &intel_pmu_ops,
|
|
|
|
|
|
.update_pi_irte = vmx_update_pi_irte,
|