Browse Source

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Paolo Bonzini:
 - ARM bugfix and MSI injection support
 - x86 nested virt tweak and OOPS fix
 - Simplify pvclock code (vdso bits acked by Andy Lutomirski).

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  nvmx: mark ept single context invalidation as supported
  nvmx: remove comment about missing nested vpid support
  KVM: lapic: fix access preemption timer stuff even if kernel_irqchip=off
  KVM: documentation: fix KVM_CAP_X2APIC_API information
  x86: vdso: use __pvclock_read_cycles
  pvclock: introduce seqcount-like API
  arm64: KVM: Set cpsr before spsr on fault injection
  KVM: arm: vgic-irqfd: Workaround changing kvm_set_routing_entry prototype
  KVM: arm/arm64: Enable MSI routing
  KVM: arm/arm64: Enable irqchip routing
  KVM: Move kvm_setup_default/empty_irq_routing declaration in arch specific header
  KVM: irqchip: Convey devid to kvm_set_msi
  KVM: Add devid in kvm_kernel_irq_routing_entry
  KVM: api: Pass the devid in the msi routing entry
Linus Torvalds 9 years ago
parent
commit
80fac0f577

+ 37 - 16
Documentation/virtual/kvm/api.txt

@@ -1433,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed.
 4.52 KVM_SET_GSI_ROUTING
 4.52 KVM_SET_GSI_ROUTING
 
 
 Capability: KVM_CAP_IRQ_ROUTING
 Capability: KVM_CAP_IRQ_ROUTING
-Architectures: x86 s390
+Architectures: x86 s390 arm arm64
 Type: vm ioctl
 Type: vm ioctl
 Parameters: struct kvm_irq_routing (in)
 Parameters: struct kvm_irq_routing (in)
 Returns: 0 on success, -1 on error
 Returns: 0 on success, -1 on error
 
 
 Sets the GSI routing table entries, overwriting any previously set entries.
 Sets the GSI routing table entries, overwriting any previously set entries.
 
 
+On arm/arm64, GSI routing has the following limitation:
+- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD.
+
 struct kvm_irq_routing {
 struct kvm_irq_routing {
 	__u32 nr;
 	__u32 nr;
 	__u32 flags;
 	__u32 flags;
@@ -1468,7 +1471,13 @@ struct kvm_irq_routing_entry {
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_S390_ADAPTER 3
 #define KVM_IRQ_ROUTING_HV_SINT 4
 #define KVM_IRQ_ROUTING_HV_SINT 4
 
 
-No flags are specified so far, the corresponding field must be set to zero.
+flags:
+- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI routing entry
+  type, specifies that the devid field contains a valid value.  The per-VM
+  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+  the device ID.  If this capability is not available, userspace should
+  never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
+- zero otherwise
 
 
 struct kvm_irq_routing_irqchip {
 struct kvm_irq_routing_irqchip {
 	__u32 irqchip;
 	__u32 irqchip;
@@ -1479,9 +1488,16 @@ struct kvm_irq_routing_msi {
 	__u32 address_lo;
 	__u32 address_lo;
 	__u32 address_hi;
 	__u32 address_hi;
 	__u32 data;
 	__u32 data;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 devid;
+	};
 };
 };
 
 
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message.  For PCI, this is usually a
+BFD identifier in the lower 16 bits.
+
 On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
 On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
 feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
 feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
 address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
 address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
@@ -2199,18 +2215,19 @@ struct kvm_msi {
 	__u8  pad[12];
 	__u8  pad[12];
 };
 };
 
 
-flags: KVM_MSI_VALID_DEVID: devid contains a valid value
-devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier
-       for the device that wrote the MSI message.
-       For PCI, this is usually a BFD identifier in the lower 16 bits.
+flags: KVM_MSI_VALID_DEVID: devid contains a valid value.  The per-VM
+  KVM_CAP_MSI_DEVID capability advertises the requirement to provide
+  the device ID.  If this capability is not available, userspace
+  should never set the KVM_MSI_VALID_DEVID flag as the ioctl might fail.
 
 
-The per-VM KVM_CAP_MSI_DEVID capability advertises the need to provide
-the device ID. If this capability is not set, userland cannot rely on
-the kernel to allow the KVM_MSI_VALID_DEVID flag being set.
+If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier
+for the device that wrote the MSI message.  For PCI, this is usually a
+BFD identifier in the lower 16 bits.
 
 
-On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is
-enabled.  If it is enabled, address_hi bits 31-8 provide bits 31-8 of the
-destination id.  Bits 7-0 of address_hi must be zero.
+On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS
+feature of KVM_CAP_X2APIC_API capability is enabled.  If it is enabled,
+address_hi bits 31-8 provide bits 31-8 of the destination id.  Bits 7-0 of
+address_hi must be zero.
 
 
 
 
 4.71 KVM_CREATE_PIT2
 4.71 KVM_CREATE_PIT2
@@ -2383,9 +2400,13 @@ Note that closing the resamplefd is not sufficient to disable the
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 irqfd.  The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
 and need not be specified with KVM_IRQFD_FLAG_DEASSIGN.
 
 
-On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared
-Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is
-given by gsi + 32.
+On arm/arm64, gsi routing being supported, the following can happen:
+- in case no routing entry is associated to this gsi, injection fails
+- in case the gsi is associated to an irqchip routing entry,
+  irqchip.pin + 32 corresponds to the injected SPI ID.
+- in case the gsi is associated to an MSI routing entry, the MSI
+  message and device ID are translated into an LPI (support restricted
+  to GICv3 ITS in-kernel emulation).
 
 
 4.76 KVM_PPC_ALLOCATE_HTAB
 4.76 KVM_PPC_ALLOCATE_HTAB
 
 

+ 2 - 0
arch/arm/kvm/Kconfig

@@ -32,6 +32,8 @@ config KVM
 	select KVM_VFIO
 	select KVM_VFIO
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_EVENTFD
 	select HAVE_KVM_IRQFD
 	select HAVE_KVM_IRQFD
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
 	---help---
 	  Support hosting virtualized guest machines.
 	  Support hosting virtualized guest machines.

+ 1 - 0
arch/arm/kvm/Makefile

@@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
 obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o
+obj-y += $(KVM)/irqchip.o
 obj-y += $(KVM)/arm/arch_timer.o
 obj-y += $(KVM)/arm/arch_timer.o

+ 19 - 0
arch/arm/kvm/irq.h

@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif

+ 2 - 0
arch/arm64/kvm/Kconfig

@@ -37,6 +37,8 @@ config KVM
 	select KVM_ARM_VGIC_V3
 	select KVM_ARM_VGIC_V3
 	select KVM_ARM_PMU if HW_PERF_EVENTS
 	select KVM_ARM_PMU if HW_PERF_EVENTS
 	select HAVE_KVM_MSI
 	select HAVE_KVM_MSI
+	select HAVE_KVM_IRQCHIP
+	select HAVE_KVM_IRQ_ROUTING
 	---help---
 	---help---
 	  Support hosting virtualized guest machines.
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
 	  We don't support KVM with 16K page tables yet, due to the multiple

+ 1 - 0
arch/arm64/kvm/Makefile

@@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o
+kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o
 kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o

+ 5 - 7
arch/arm64/kvm/inject_fault.c

@@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type)
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr)
 {
 {
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
-	bool is_aarch32;
+	bool is_aarch32 = vcpu_mode_is_32bit(vcpu);
 	u32 esr = 0;
 	u32 esr = 0;
 
 
-	is_aarch32 = vcpu_mode_is_32bit(vcpu);
-
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 
 	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 	vcpu_sys_reg(vcpu, FAR_EL1) = addr;
 
 
@@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu)
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	unsigned long cpsr = *vcpu_cpsr(vcpu);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 	u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT);
 
 
-	*vcpu_spsr(vcpu) = cpsr;
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
 	*vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu);
-
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
 	*vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync);
+
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
 	*vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64;
+	*vcpu_spsr(vcpu) = cpsr;
 
 
 	/*
 	/*
 	 * Build an unknown exception, depending on the instruction
 	 * Build an unknown exception, depending on the instruction

+ 19 - 0
arch/arm64/kvm/irq.h

@@ -0,0 +1,19 @@
+/*
+ * irq.h: in kernel interrupt controller related definitions
+ * Copyright (c) 2016 Red Hat, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This header is included by irqchip.c. However, on ARM, interrupt
+ * controller declarations are located in include/kvm/arm_vgic.h since
+ * they are mostly shared between arm and arm64.
+ */
+
+#ifndef __IRQ_H
+#define __IRQ_H
+
+#include <kvm/arm_vgic.h>
+
+#endif

+ 5 - 20
arch/x86/entry/vdso/vclock_gettime.c

@@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode)
 {
 {
 	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
 	const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti;
 	cycle_t ret;
 	cycle_t ret;
-	u64 tsc, pvti_tsc;
-	u64 last, delta, pvti_system_time;
-	u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift;
+	u64 last;
+	u32 version;
 
 
 	/*
 	/*
 	 * Note: The kernel and hypervisor must guarantee that cpu ID
 	 * Note: The kernel and hypervisor must guarantee that cpu ID
@@ -123,29 +122,15 @@ static notrace cycle_t vread_pvclock(int *mode)
 	 */
 	 */
 
 
 	do {
 	do {
-		version = pvti->version;
-
-		smp_rmb();
+		version = pvclock_read_begin(pvti);
 
 
 		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
 		if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) {
 			*mode = VCLOCK_NONE;
 			*mode = VCLOCK_NONE;
 			return 0;
 			return 0;
 		}
 		}
 
 
-		tsc = rdtsc_ordered();
-		pvti_tsc_to_system_mul = pvti->tsc_to_system_mul;
-		pvti_tsc_shift = pvti->tsc_shift;
-		pvti_system_time = pvti->system_time;
-		pvti_tsc = pvti->tsc_timestamp;
-
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while (unlikely((version & 1) || version != pvti->version));
-
-	delta = tsc - pvti_tsc;
-	ret = pvti_system_time +
-		pvclock_scale_delta(delta, pvti_tsc_to_system_mul,
-				    pvti_tsc_shift);
+		ret = __pvclock_read_cycles(pvti);
+	} while (pvclock_read_retry(pvti, version));
 
 
 	/* refer to vread_tsc() comment for rationale */
 	/* refer to vread_tsc() comment for rationale */
 	last = gtod->cycle_last;
 	last = gtod->cycle_last;

+ 23 - 16
arch/x86/include/asm/pvclock.h

@@ -25,6 +25,24 @@ void pvclock_resume(void);
 
 
 void pvclock_touch_watchdogs(void);
 void pvclock_touch_watchdogs(void);
 
 
+static __always_inline
+unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src)
+{
+	unsigned version = src->version & ~1;
+	/* Make sure that the version is read before the data. */
+	virt_rmb();
+	return version;
+}
+
+static __always_inline
+bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src,
+			unsigned version)
+{
+	/* Make sure that the version is re-read after the data. */
+	virt_rmb();
+	return unlikely(version != src->version);
+}
+
 /*
 /*
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
  * yielding a 64-bit result.
  * yielding a 64-bit result.
@@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift)
 }
 }
 
 
 static __always_inline
 static __always_inline
-unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src,
-			       cycle_t *cycles, u8 *flags)
+cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src)
 {
 {
-	unsigned version;
-	cycle_t offset;
-	u64 delta;
-
-	version = src->version;
-	/* Make the latest version visible */
-	smp_rmb();
-
-	delta = rdtsc_ordered() - src->tsc_timestamp;
-	offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
-				   src->tsc_shift);
-	*cycles = src->system_time + offset;
-	*flags = src->flags;
-	return version;
+	u64 delta = rdtsc_ordered() - src->tsc_timestamp;
+	cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul,
+					     src->tsc_shift);
+	return src->system_time + offset;
 }
 }
 
 
 struct pvclock_vsyscall_time_info {
 struct pvclock_vsyscall_time_info {

+ 6 - 11
arch/x86/kernel/pvclock.c

@@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 	u8 flags;
 
 
 	do {
 	do {
-		version = src->version;
-		/* Make the latest version visible */
-		smp_rmb();
-
+		version = pvclock_read_begin(src);
 		flags = src->flags;
 		flags = src->flags;
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+	} while (pvclock_read_retry(src, version));
 
 
 	return flags & valid_flags;
 	return flags & valid_flags;
 }
 }
@@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src)
 	u8 flags;
 	u8 flags;
 
 
 	do {
 	do {
-		version = __pvclock_read_cycles(src, &ret, &flags);
-		/* Make sure that the version double-check is last. */
-		smp_rmb();
-	} while ((src->version & 1) || version != src->version);
+		version = pvclock_read_begin(src);
+		ret = __pvclock_read_cycles(src);
+		flags = src->flags;
+	} while (pvclock_read_retry(src, version));
 
 
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
 	if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) {
 		src->flags &= ~PVCLOCK_GUEST_STOPPED;
 		src->flags &= ~PVCLOCK_GUEST_STOPPED;

+ 3 - 0
arch/x86/kvm/irq.h

@@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu);
 
 
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 int apic_has_pending_timer(struct kvm_vcpu *vcpu);
 
 
+int kvm_setup_default_irq_routing(struct kvm *kvm);
+int kvm_setup_empty_irq_routing(struct kvm *kvm);
+
 #endif
 #endif

+ 3 - 0
arch/x86/kvm/lapic.c

@@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
 
 
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
 {
 {
+	if (!lapic_in_kernel(vcpu))
+		return false;
+
 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
 	return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
 }
 }
 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);

+ 7 - 8
arch/x86/kvm/vmx.c

@@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx)
 			vmx->nested.nested_vmx_ept_caps |=
 			vmx->nested.nested_vmx_ept_caps |=
 				VMX_EPT_EXECUTE_ONLY_BIT;
 				VMX_EPT_EXECUTE_ONLY_BIT;
 		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
 		vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept;
-		/*
-		 * For nested guests, we don't do anything specific
-		 * for single context invalidation. Hence, only advertise
-		 * support for global context invalidation.
-		 */
-		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT;
+		vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
+			VMX_EPT_EXTENT_CONTEXT_BIT;
 	} else
 	} else
 		vmx->nested.nested_vmx_ept_caps = 0;
 		vmx->nested.nested_vmx_ept_caps = 0;
 
 
@@ -2945,7 +2941,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 			vmx->nested.nested_vmx_secondary_ctls_high);
 			vmx->nested.nested_vmx_secondary_ctls_high);
 		break;
 		break;
 	case MSR_IA32_VMX_EPT_VPID_CAP:
 	case MSR_IA32_VMX_EPT_VPID_CAP:
-		/* Currently, no nested vpid support */
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 		*pdata = vmx->nested.nested_vmx_ept_caps |
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 			((u64)vmx->nested.nested_vmx_vpid_caps << 32);
 		break;
 		break;
@@ -7609,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu)
 
 
 	switch (type) {
 	switch (type) {
 	case VMX_EPT_EXTENT_GLOBAL:
 	case VMX_EPT_EXTENT_GLOBAL:
+	/*
+	 * TODO: track mappings and invalidate
+	 * single context requests appropriately
+	 */
+	case VMX_EPT_EXTENT_CONTEXT:
 		kvm_mmu_sync_roots(vcpu);
 		kvm_mmu_sync_roots(vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu);
 		nested_vmx_succeed(vcpu);
 		nested_vmx_succeed(vcpu);
 		break;
 		break;
 	default:
 	default:
-		/* Trap single context invalidation invept calls */
 		BUG_ON(1);
 		BUG_ON(1);
 		break;
 		break;
 	}
 	}

+ 7 - 0
include/kvm/arm_vgic.h

@@ -34,6 +34,7 @@
 #define VGIC_MAX_SPI		1019
 #define VGIC_MAX_SPI		1019
 #define VGIC_MAX_RESERVED	1023
 #define VGIC_MAX_RESERVED	1023
 #define VGIC_MIN_LPI		8192
 #define VGIC_MIN_LPI		8192
+#define KVM_IRQCHIP_NUM_PINS	(1020 - 32)
 
 
 enum vgic_type {
 enum vgic_type {
 	VGIC_V2,		/* Good ol' GICv2 */
 	VGIC_V2,		/* Good ol' GICv2 */
@@ -314,4 +315,10 @@ static inline int kvm_vgic_get_max_vcpus(void)
 
 
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
 int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi);
 
 
+/**
+ * kvm_vgic_setup_default_irq_routing:
+ * Setup a default flat gsi routing table mapping all SPIs
+ */
+int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
+
 #endif /* __KVM_ARM_VGIC_H */
 #endif /* __KVM_ARM_VGIC_H */

+ 9 - 3
include/linux/kvm_host.h

@@ -317,7 +317,13 @@ struct kvm_kernel_irq_routing_entry {
 			unsigned irqchip;
 			unsigned irqchip;
 			unsigned pin;
 			unsigned pin;
 		} irqchip;
 		} irqchip;
-		struct msi_msg msi;
+		struct {
+			u32 address_lo;
+			u32 address_hi;
+			u32 data;
+			u32 flags;
+			u32 devid;
+		} msi;
 		struct kvm_s390_adapter_int adapter;
 		struct kvm_s390_adapter_int adapter;
 		struct kvm_hv_sint hv_sint;
 		struct kvm_hv_sint hv_sint;
 	};
 	};
@@ -1003,12 +1009,12 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
 
 
 #ifdef CONFIG_S390
 #ifdef CONFIG_S390
 #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
 #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that...
+#elif defined(CONFIG_ARM64)
+#define KVM_MAX_IRQ_ROUTES 4096
 #else
 #else
 #define KVM_MAX_IRQ_ROUTES 1024
 #define KVM_MAX_IRQ_ROUTES 1024
 #endif
 #endif
 
 
-int kvm_setup_default_irq_routing(struct kvm *kvm);
-int kvm_setup_empty_irq_routing(struct kvm *kvm);
 int kvm_set_irq_routing(struct kvm *kvm,
 int kvm_set_irq_routing(struct kvm *kvm,
 			const struct kvm_irq_routing_entry *entries,
 			const struct kvm_irq_routing_entry *entries,
 			unsigned nr,
 			unsigned nr,

+ 4 - 1
include/uapi/linux/kvm.h

@@ -882,7 +882,10 @@ struct kvm_irq_routing_msi {
 	__u32 address_lo;
 	__u32 address_lo;
 	__u32 address_hi;
 	__u32 address_hi;
 	__u32 data;
 	__u32 data;
-	__u32 pad;
+	union {
+		__u32 pad;
+		__u32 devid;
+	};
 };
 };
 
 
 struct kvm_irq_routing_s390_adapter {
 struct kvm_irq_routing_s390_adapter {

+ 4 - 0
virt/kvm/arm/vgic/vgic-init.c

@@ -264,6 +264,10 @@ int vgic_init(struct kvm *kvm)
 	kvm_for_each_vcpu(i, vcpu, kvm)
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		kvm_vgic_vcpu_init(vcpu);
 		kvm_vgic_vcpu_init(vcpu);
 
 
+	ret = kvm_vgic_setup_default_irq_routing(kvm);
+	if (ret)
+		goto out;
+
 	dist->initialized = true;
 	dist->initialized = true;
 out:
 out:
 	return ret;
 	return ret;

+ 98 - 18
virt/kvm/arm/vgic/vgic-irqfd.c

@@ -17,36 +17,116 @@
 #include <linux/kvm.h>
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm_host.h>
 #include <trace/events/kvm.h>
 #include <trace/events/kvm.h>
+#include <kvm/arm_vgic.h>
+#include "vgic.h"
 
 
-int kvm_irq_map_gsi(struct kvm *kvm,
-		    struct kvm_kernel_irq_routing_entry *entries,
-		    int gsi)
+/**
+ * vgic_irqfd_set_irq: inject the IRQ corresponding to the
+ * irqchip routing entry
+ *
+ * This is the entry point for irqfd IRQ injection
+ */
+static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
+			struct kvm *kvm, int irq_source_id,
+			int level, bool line_status)
 {
 {
-	return 0;
+	unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS;
+
+	if (!vgic_valid_spi(kvm, spi_id))
+		return -EINVAL;
+	return kvm_vgic_inject_irq(kvm, 0, spi_id, level);
 }
 }
 
 
-int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip,
-			 unsigned int pin)
+/**
+ * kvm_set_routing_entry: populate a kvm routing entry
+ * from a user routing entry
+ *
+ * @kvm: the VM this entry is applied to
+ * @e: kvm kernel routing entry handle
+ * @ue: user api routing entry handle
+ * return 0 on success, -EINVAL on errors.
+ */
+#ifdef KVM_CAP_X2APIC_API
+int kvm_set_routing_entry(struct kvm *kvm,
+			  struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+#else
+/* Remove this version and the ifdefery once merged into 4.8 */
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+#endif
 {
 {
-	return pin;
+	int r = -EINVAL;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		e->set = vgic_irqfd_set_irq;
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin;
+		if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) ||
+		    (e->irqchip.irqchip >= KVM_NR_IRQCHIPS))
+			goto out;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		e->msi.flags = ue->flags;
+		e->msi.devid = ue->u.msi.devid;
+		break;
+	default:
+		goto out;
+	}
+	r = 0;
+out:
+	return r;
 }
 }
 
 
-int kvm_set_irq(struct kvm *kvm, int irq_source_id,
-		u32 irq, int level, bool line_status)
+/**
+ * kvm_set_msi: inject the MSI corresponding to the
+ * MSI routing entry
+ *
+ * This is the entry point for irqfd MSI injection
+ * and userspace MSI injection.
+ */
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id,
+		int level, bool line_status)
 {
 {
-	unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS;
+	struct kvm_msi msi;
 
 
-	trace_kvm_set_irq(irq, level, irq_source_id);
+	msi.address_lo = e->msi.address_lo;
+	msi.address_hi = e->msi.address_hi;
+	msi.data = e->msi.data;
+	msi.flags = e->msi.flags;
+	msi.devid = e->msi.devid;
 
 
-	BUG_ON(!vgic_initialized(kvm));
+	if (!vgic_has_its(kvm))
+		return -ENODEV;
 
 
-	return kvm_vgic_inject_irq(kvm, 0, spi, level);
+	return vgic_its_inject_msi(kvm, &msi);
 }
 }
 
 
-/* MSI not implemented yet */
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		struct kvm *kvm, int irq_source_id,
-		int level, bool line_status)
+int kvm_vgic_setup_default_irq_routing(struct kvm *kvm)
 {
 {
-	return 0;
+	struct kvm_irq_routing_entry *entries;
+	struct vgic_dist *dist = &kvm->arch.vgic;
+	u32 nr = dist->nr_spis;
+	int i, ret;
+
+	entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry),
+			  GFP_KERNEL);
+	if (!entries)
+		return -ENOMEM;
+
+	for (i = 0; i < nr; i++) {
+		entries[i].gsi = i;
+		entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+		entries[i].u.irqchip.irqchip = 0;
+		entries[i].u.irqchip.pin = i;
+	}
+	ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+	kfree(entries);
+	return ret;
 }
 }

+ 0 - 7
virt/kvm/arm/vgic/vgic.c

@@ -711,10 +711,3 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
 	return map_is_active;
 	return map_is_active;
 }
 }
 
 
-int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
-{
-	if (vgic_has_its(kvm))
-		return vgic_its_inject_msi(kvm, msi);
-	else
-		return -ENODEV;
-}

+ 18 - 10
virt/kvm/irqchip.c

@@ -62,12 +62,14 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
 {
 	struct kvm_kernel_irq_routing_entry route;
 	struct kvm_kernel_irq_routing_entry route;
 
 
-	if (!irqchip_in_kernel(kvm) || msi->flags != 0)
+	if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID))
 		return -EINVAL;
 		return -EINVAL;
 
 
 	route.msi.address_lo = msi->address_lo;
 	route.msi.address_lo = msi->address_lo;
 	route.msi.address_hi = msi->address_hi;
 	route.msi.address_hi = msi->address_hi;
 	route.msi.data = msi->data;
 	route.msi.data = msi->data;
+	route.msi.flags = msi->flags;
+	route.msi.devid = msi->devid;
 
 
 	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
 	return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false);
 }
 }
@@ -177,6 +179,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			unsigned flags)
 			unsigned flags)
 {
 {
 	struct kvm_irq_routing_table *new, *old;
 	struct kvm_irq_routing_table *new, *old;
+	struct kvm_kernel_irq_routing_entry *e;
 	u32 i, j, nr_rt_entries = 0;
 	u32 i, j, nr_rt_entries = 0;
 	int r;
 	int r;
 
 
@@ -200,23 +203,25 @@ int kvm_set_irq_routing(struct kvm *kvm,
 			new->chip[i][j] = -1;
 			new->chip[i][j] = -1;
 
 
 	for (i = 0; i < nr; ++i) {
 	for (i = 0; i < nr; ++i) {
-		struct kvm_kernel_irq_routing_entry *e;
-
 		r = -ENOMEM;
 		r = -ENOMEM;
 		e = kzalloc(sizeof(*e), GFP_KERNEL);
 		e = kzalloc(sizeof(*e), GFP_KERNEL);
 		if (!e)
 		if (!e)
 			goto out;
 			goto out;
 
 
 		r = -EINVAL;
 		r = -EINVAL;
-		if (ue->flags) {
-			kfree(e);
-			goto out;
+		switch (ue->type) {
+		case KVM_IRQ_ROUTING_MSI:
+			if (ue->flags & ~KVM_MSI_VALID_DEVID)
+				goto free_entry;
+			break;
+		default:
+			if (ue->flags)
+				goto free_entry;
+			break;
 		}
 		}
 		r = setup_routing_entry(kvm, new, e, ue);
 		r = setup_routing_entry(kvm, new, e, ue);
-		if (r) {
-			kfree(e);
-			goto out;
-		}
+		if (r)
+			goto free_entry;
 		++ue;
 		++ue;
 	}
 	}
 
 
@@ -233,7 +238,10 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
 
 	new = old;
 	new = old;
 	r = 0;
 	r = 0;
+	goto out;
 
 
+free_entry:
+	kfree(e);
 out:
 out:
 	free_irq_routing_table(new);
 	free_irq_routing_table(new);