16 жил өмнө · ed9216c171
--- a/Documentation/kvm/api.txt
+++ b/Documentation/kvm/api.txt
@@ -593,6 +593,115 @@ struct kvm_irqchip {
 
				 	} chip;
			
 
				 };
			
 
				 
			
 
				+4.27 KVM_XEN_HVM_CONFIG
			
 
				+
			
 
				+Capability: KVM_CAP_XEN_HVM
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_xen_hvm_config (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the MSR that the Xen HVM guest uses to initialize its hypercall
			
 
				+page, and provides the starting address and size of the hypercall
			
 
				+blobs in userspace.  When the guest writes the MSR, kvm copies one
			
 
				+page of a blob (32- or 64-bit, depending on the vcpu mode) to guest
			
 
				+memory.
			
 
				+
			
 
				+struct kvm_xen_hvm_config {
			
 
				+	__u32 flags;
			
 
				+	__u32 msr;
			
 
				+	__u64 blob_addr_32;
			
 
				+	__u64 blob_addr_64;
			
 
				+	__u8 blob_size_32;
			
 
				+	__u8 blob_size_64;
			
 
				+	__u8 pad2[30];
			
 
				+};
			
 
				+
			
 
				+4.27 KVM_GET_CLOCK
			
 
				+
			
 
				+Capability: KVM_CAP_ADJUST_CLOCK
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_clock_data (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Gets the current timestamp of kvmclock as seen by the current guest. In
			
 
				+conjunction with KVM_SET_CLOCK, it is used to ensure monotonicity on scenarios
			
 
				+such as migration.
			
 
				+
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;  /* kvmclock current value */
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				+4.28 KVM_SET_CLOCK
			
 
				+
			
 
				+Capability: KVM_CAP_ADJUST_CLOCK
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_clock_data (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Sets the current timestamp of kvmclock to the valued specific in its parameter.
			
 
				+In conjunction with KVM_GET_CLOCK, it is used to ensure monotonicity on scenarios
			
 
				+such as migration.
			
 
				+
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;  /* kvmclock current value */
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				+4.29 KVM_GET_VCPU_EVENTS
			
 
				+
			
 
				+Capability: KVM_CAP_VCPU_EVENTS
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_vcpu_event (out)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Gets currently pending exceptions, interrupts, and NMIs as well as related
			
 
				+states of the vcpu.
			
 
				+
			
 
				+struct kvm_vcpu_events {
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 has_error_code;
			
 
				+		__u8 pad;
			
 
				+		__u32 error_code;
			
 
				+	} exception;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 soft;
			
 
				+		__u8 pad;
			
 
				+	} interrupt;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 pending;
			
 
				+		__u8 masked;
			
 
				+		__u8 pad;
			
 
				+	} nmi;
			
 
				+	__u32 sipi_vector;
			
 
				+	__u32 flags;   /* must be zero */
			
 
				+};
			
 
				+
			
 
				+4.30 KVM_SET_VCPU_EVENTS
			
 
				+
			
 
				+Capability: KVM_CAP_VCPU_EVENTS
			
 
				+Architectures: x86
			
 
				+Type: vm ioctl
			
 
				+Parameters: struct kvm_vcpu_event (in)
			
 
				+Returns: 0 on success, -1 on error
			
 
				+
			
 
				+Set pending exceptions, interrupts, and NMIs as well as related states of the
			
 
				+vcpu.
			
 
				+
			
 
				+See KVM_GET_VCPU_EVENTS for the data structure.
			
 
				+
			
 
				+
			
 
				 5. The kvm_run structure
			
 
				 
			
 
				 Application code obtains a pointer to the kvm_run structure by
			
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -83,6 +83,13 @@ config KRETPROBES
 
				 	def_bool y
			
 
				 	depends on KPROBES && HAVE_KRETPROBES
			
 
				 
			
 
				+config USER_RETURN_NOTIFIER
			
 
				+	bool
			
 
				+	depends on HAVE_USER_RETURN_NOTIFIER
			
 
				+	help
			
 
				+	  Provide a kernel-internal notification when a cpu is about to
			
 
				+	  switch to user mode.
			
 
				+
			
 
				 config HAVE_IOREMAP_PROT
			
 
				 	bool
			
 
				 
			
@@ -132,5 +139,7 @@ config HAVE_HW_BREAKPOINT
 
				 	select ANON_INODES
			
 
				 	select PERF_EVENTS
			
 
				 
			
 
				+config HAVE_USER_RETURN_NOTIFIER
			
 
				+	bool
			
 
				 
			
 
				 source "kernel/gcov/Kconfig"
			
--- a/arch/ia64/include/asm/kvm.h
+++ b/arch/ia64/include/asm/kvm.h
@@ -60,6 +60,7 @@ struct kvm_ioapic_state {
 
				 #define KVM_IRQCHIP_PIC_MASTER   0
			
 
				 #define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				 #define KVM_IRQCHIP_IOAPIC       2
			
 
				+#define KVM_NR_IRQCHIPS          3
			
 
				 
			
 
				 #define KVM_CONTEXT_SIZE	8*1024
			
 
				 
			
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -475,7 +475,6 @@ struct kvm_arch {
 
				 	struct list_head assigned_dev_head;
			
 
				 	struct iommu_domain *iommu_domain;
			
 
				 	int iommu_flags;
			
 
				-	struct hlist_head irq_ack_notifier_list;
			
 
				 
			
 
				 	unsigned long irq_sources_bitmap;
			
 
				 	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
			
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -49,7 +49,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
				 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
			
 
				 
			
 
				 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				-		coalesced_mmio.o irq_comm.o)
			
 
				+		coalesced_mmio.o irq_comm.o assigned-dev.o)
			
 
				 
			
 
				 ifeq ($(CONFIG_IOMMU_API),y)
			
 
				 common-objs += $(addprefix ../../../virt/kvm/, iommu.o)
			
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -124,7 +124,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
				 
			
 
				 static  DEFINE_SPINLOCK(vp_lock);
			
 
				 
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				 	long  status;
			
 
				 	long  tmp_base;
			
@@ -137,7 +137,7 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 	slot = ia64_itr_entry(0x3, KVM_VMM_BASE, pte, KVM_VMM_SHIFT);
			
 
				 	local_irq_restore(saved_psr);
			
 
				 	if (slot < 0)
			
 
				-		return;
			
 
				+		return -EINVAL;
			
 
				 
			
 
				 	spin_lock(&vp_lock);
			
 
				 	status = ia64_pal_vp_init_env(kvm_vsa_base ?
			
@@ -145,7 +145,7 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
			
 
				 	if (status != 0) {
			
 
				 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
			
 
				-		return ;
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	if (!kvm_vsa_base) {
			
@@ -154,6 +154,8 @@ void kvm_arch_hardware_enable(void *garbage)
 
				 	}
			
 
				 	spin_unlock(&vp_lock);
			
 
				 	ia64_ptr_entry(0x3, slot);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -851,8 +853,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm,
 
				 	r = 0;
			
 
				 	switch (chip->chip_id) {
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		memcpy(&chip->chip.ioapic, ioapic_irqchip(kvm),
			
 
				-				sizeof(struct kvm_ioapic_state));
			
 
				+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -868,9 +869,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
				 	r = 0;
			
 
				 	switch (chip->chip_id) {
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		memcpy(ioapic_irqchip(kvm),
			
 
				-				&chip->chip.ioapic,
			
 
				-				sizeof(struct kvm_ioapic_state));
			
 
				+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -944,7 +943,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 {
			
 
				 	struct kvm *kvm = filp->private_data;
			
 
				 	void __user *argp = (void __user *)arg;
			
 
				-	int r = -EINVAL;
			
 
				+	int r = -ENOTTY;
			
 
				 
			
 
				 	switch (ioctl) {
			
 
				 	case KVM_SET_MEMORY_REGION: {
			
@@ -985,10 +984,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 			goto out;
			
 
				 		if (irqchip_in_kernel(kvm)) {
			
 
				 			__s32 status;
			
 
				-			mutex_lock(&kvm->irq_lock);
			
 
				 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
			
 
				 				    irq_event.irq, irq_event.level);
			
 
				-			mutex_unlock(&kvm->irq_lock);
			
 
				 			if (ioctl == KVM_IRQ_LINE_STATUS) {
			
 
				 				irq_event.status = status;
			
 
				 				if (copy_to_user(argp, &irq_event,
			
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -78,8 +78,9 @@ int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu)
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -421,7 +422,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 
			
 
				 	switch (ioctl) {
			
 
				 	default:
			
 
				-		r = -EINVAL;
			
 
				+		r = -ENOTTY;
			
 
				 	}
			
 
				 
			
 
				 	return r;
			
--- a/arch/powerpc/kvm/timing.h
+++ b/arch/powerpc/kvm/timing.h
@@ -51,7 +51,7 @@ static inline void kvmppc_account_exit_stat(struct kvm_vcpu *vcpu, int type)
 
				 
			
 
				 	/* The BUILD_BUG_ON below breaks in funny ways, commented out
			
 
				 	 * for now ... -BenH
			
 
				-	BUILD_BUG_ON(__builtin_constant_p(type));
			
 
				+	BUILD_BUG_ON(!__builtin_constant_p(type));
			
 
				 	*/
			
 
				 	switch (type) {
			
 
				 	case EXT_INTR_EXITS:
			
--- a/arch/s390/include/asm/kvm.h
+++ b/arch/s390/include/asm/kvm.h
@@ -1,6 +1,5 @@
 
				 #ifndef __LINUX_KVM_S390_H
			
 
				 #define __LINUX_KVM_S390_H
			
 
				-
			
 
				 /*
			
 
				  * asm-s390/kvm.h - KVM s390 specific structures and definitions
			
 
				  *
			
@@ -15,6 +14,8 @@
 
				  */
			
 
				 #include <linux/types.h>
			
 
				 
			
 
				+#define __KVM_S390
			
 
				+
			
 
				 /* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				 struct kvm_regs {
			
 
				 	/* general purpose regs for s390 */
			
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -74,9 +74,10 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
				 static unsigned long long *facilities;
			
 
				 
			
 
				 /* Section: not file related */
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				 	/* every s390 is virtualization enabled ;-) */
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
@@ -116,10 +117,16 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
				 
			
 
				 int kvm_dev_ioctl_check_extension(long ext)
			
 
				 {
			
 
				+	int r;
			
 
				+
			
 
				 	switch (ext) {
			
 
				+	case KVM_CAP_S390_PSW:
			
 
				+		r = 1;
			
 
				+		break;
			
 
				 	default:
			
 
				-		return 0;
			
 
				+		r = 0;
			
 
				 	}
			
 
				+	return r;
			
 
				 }
			
 
				 
			
 
				 /* Section: vm related */
			
@@ -150,7 +157,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 		break;
			
 
				 	}
			
 
				 	default:
			
 
				-		r = -EINVAL;
			
 
				+		r = -ENOTTY;
			
 
				 	}
			
 
				 
			
 
				 	return r;
			
@@ -419,8 +426,10 @@ static int kvm_arch_vcpu_ioctl_set_initial_psw(struct kvm_vcpu *vcpu, psw_t psw)
 
				 	vcpu_load(vcpu);
			
 
				 	if (atomic_read(&vcpu->arch.sie_block->cpuflags) & CPUSTAT_RUNNING)
			
 
				 		rc = -EBUSY;
			
 
				-	else
			
 
				-		vcpu->arch.sie_block->gpsw = psw;
			
 
				+	else {
			
 
				+		vcpu->run->psw_mask = psw.mask;
			
 
				+		vcpu->run->psw_addr = psw.addr;
			
 
				+	}
			
 
				 	vcpu_put(vcpu);
			
 
				 	return rc;
			
 
				 }
			
@@ -508,9 +517,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 
			
 
				 	switch (kvm_run->exit_reason) {
			
 
				 	case KVM_EXIT_S390_SIEIC:
			
 
				-		vcpu->arch.sie_block->gpsw.mask = kvm_run->s390_sieic.mask;
			
 
				-		vcpu->arch.sie_block->gpsw.addr = kvm_run->s390_sieic.addr;
			
 
				-		break;
			
 
				 	case KVM_EXIT_UNKNOWN:
			
 
				 	case KVM_EXIT_INTR:
			
 
				 	case KVM_EXIT_S390_RESET:
			
@@ -519,6 +525,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		BUG();
			
 
				 	}
			
 
				 
			
 
				+	vcpu->arch.sie_block->gpsw.mask = kvm_run->psw_mask;
			
 
				+	vcpu->arch.sie_block->gpsw.addr = kvm_run->psw_addr;
			
 
				+
			
 
				 	might_fault();
			
 
				 
			
 
				 	do {
			
@@ -538,8 +547,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		/* intercept cannot be handled in-kernel, prepare kvm-run */
			
 
				 		kvm_run->exit_reason         = KVM_EXIT_S390_SIEIC;
			
 
				 		kvm_run->s390_sieic.icptcode = vcpu->arch.sie_block->icptcode;
			
 
				-		kvm_run->s390_sieic.mask     = vcpu->arch.sie_block->gpsw.mask;
			
 
				-		kvm_run->s390_sieic.addr     = vcpu->arch.sie_block->gpsw.addr;
			
 
				 		kvm_run->s390_sieic.ipa      = vcpu->arch.sie_block->ipa;
			
 
				 		kvm_run->s390_sieic.ipb      = vcpu->arch.sie_block->ipb;
			
 
				 		rc = 0;
			
@@ -551,6 +558,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		rc = 0;
			
 
				 	}
			
 
				 
			
 
				+	kvm_run->psw_mask     = vcpu->arch.sie_block->gpsw.mask;
			
 
				+	kvm_run->psw_addr     = vcpu->arch.sie_block->gpsw.addr;
			
 
				+
			
 
				 	if (vcpu->sigset_active)
			
 
				 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
			
 
				 
			
--- a/arch/s390/kvm/sigp.c
+++ b/arch/s390/kvm/sigp.c
@@ -188,9 +188,9 @@ static int __sigp_set_prefix(struct kvm_vcpu *vcpu, u16 cpu_addr, u32 address,
 
				 
			
 
				 	/* make sure that the new value is valid memory */
			
 
				 	address = address & 0x7fffe000u;
			
 
				-	if ((copy_from_guest(vcpu, &tmp,
			
 
				-		(u64) (address + vcpu->arch.sie_block->gmsor) , 1)) ||
			
 
				-	   (copy_from_guest(vcpu, &tmp, (u64) (address +
			
 
				+	if ((copy_from_user(&tmp, (void __user *)
			
 
				+		(address + vcpu->arch.sie_block->gmsor) , 1)) ||
			
 
				+	   (copy_from_user(&tmp, (void __user *)(address +
			
 
				 			vcpu->arch.sie_block->gmsor + PAGE_SIZE), 1))) {
			
 
				 		*reg |= SIGP_STAT_INVALID_PARAMETER;
			
 
				 		return 1; /* invalid parameter */
			
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -51,6 +51,7 @@ config X86
 
				 	select HAVE_KERNEL_LZMA
			
 
				 	select HAVE_HW_BREAKPOINT
			
 
				 	select HAVE_ARCH_KMEMCHECK
			
 
				+	select HAVE_USER_RETURN_NOTIFIER
			
 
				 
			
 
				 config OUTPUT_FORMAT
			
 
				 	string
			
--- a/arch/x86/include/asm/kvm.h
+++ b/arch/x86/include/asm/kvm.h
@@ -19,6 +19,8 @@
 
				 #define __KVM_HAVE_MSIX
			
 
				 #define __KVM_HAVE_MCE
			
 
				 #define __KVM_HAVE_PIT_STATE2
			
 
				+#define __KVM_HAVE_XEN_HVM
			
 
				+#define __KVM_HAVE_VCPU_EVENTS
			
 
				 
			
 
				 /* Architectural interrupt line count. */
			
 
				 #define KVM_NR_INTERRUPTS 256
			
@@ -79,6 +81,7 @@ struct kvm_ioapic_state {
 
				 #define KVM_IRQCHIP_PIC_MASTER   0
			
 
				 #define KVM_IRQCHIP_PIC_SLAVE    1
			
 
				 #define KVM_IRQCHIP_IOAPIC       2
			
 
				+#define KVM_NR_IRQCHIPS          3
			
 
				 
			
 
				 /* for KVM_GET_REGS and KVM_SET_REGS */
			
 
				 struct kvm_regs {
			
@@ -250,4 +253,31 @@ struct kvm_reinject_control {
 
				 	__u8 pit_reinject;
			
 
				 	__u8 reserved[31];
			
 
				 };
			
 
				+
			
 
				+/* for KVM_GET/SET_VCPU_EVENTS */
			
 
				+struct kvm_vcpu_events {
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 has_error_code;
			
 
				+		__u8 pad;
			
 
				+		__u32 error_code;
			
 
				+	} exception;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 nr;
			
 
				+		__u8 soft;
			
 
				+		__u8 pad;
			
 
				+	} interrupt;
			
 
				+	struct {
			
 
				+		__u8 injected;
			
 
				+		__u8 pending;
			
 
				+		__u8 masked;
			
 
				+		__u8 pad;
			
 
				+	} nmi;
			
 
				+	__u32 sipi_vector;
			
 
				+	__u32 flags;
			
 
				+	__u32 reserved[10];
			
 
				+};
			
 
				+
			
 
				 #endif /* _ASM_X86_KVM_H */
			
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -129,7 +129,7 @@ struct decode_cache {
 
				 	u8 seg_override;
			
 
				 	unsigned int d;
			
 
				 	unsigned long regs[NR_VCPU_REGS];
			
 
				-	unsigned long eip;
			
 
				+	unsigned long eip, eip_orig;
			
 
				 	/* modrm */
			
 
				 	u8 modrm;
			
 
				 	u8 modrm_mod;
			
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -354,7 +354,6 @@ struct kvm_vcpu_arch {
 
				 	unsigned int time_offset;
			
 
				 	struct page *time_page;
			
 
				 
			
 
				-	bool singlestep; /* guest is single stepped by KVM */
			
 
				 	bool nmi_pending;
			
 
				 	bool nmi_injected;
			
 
				 
			
@@ -371,6 +370,10 @@ struct kvm_vcpu_arch {
 
				 	u64 mcg_status;
			
 
				 	u64 mcg_ctl;
			
 
				 	u64 *mce_banks;
			
 
				+
			
 
				+	/* used for guest single stepping over the given code position */
			
 
				+	u16 singlestep_cs;
			
 
				+	unsigned long singlestep_rip;
			
 
				 };
			
 
				 
			
 
				 struct kvm_mem_alias {
			
@@ -397,7 +400,6 @@ struct kvm_arch{
 
				 	struct kvm_pic *vpic;
			
 
				 	struct kvm_ioapic *vioapic;
			
 
				 	struct kvm_pit *vpit;
			
 
				-	struct hlist_head irq_ack_notifier_list;
			
 
				 	int vapics_in_nmi_mode;
			
 
				 
			
 
				 	unsigned int tss_addr;
			
@@ -410,8 +412,10 @@ struct kvm_arch{
 
				 	gpa_t ept_identity_map_addr;
			
 
				 
			
 
				 	unsigned long irq_sources_bitmap;
			
 
				-	unsigned long irq_states[KVM_IOAPIC_NUM_PINS];
			
 
				 	u64 vm_init_tsc;
			
 
				+	s64 kvmclock_offset;
			
 
				+
			
 
				+	struct kvm_xen_hvm_config xen_hvm_config;
			
 
				 };
			
 
				 
			
 
				 struct kvm_vm_stat {
			
@@ -461,7 +465,7 @@ struct descriptor_table {
 
				 struct kvm_x86_ops {
			
 
				 	int (*cpu_has_kvm_support)(void);          /* __init */
			
 
				 	int (*disabled_by_bios)(void);             /* __init */
			
 
				-	void (*hardware_enable)(void *dummy);      /* __init */
			
 
				+	int (*hardware_enable)(void *dummy);
			
 
				 	void (*hardware_disable)(void *dummy);
			
 
				 	void (*check_processor_compatibility)(void *rtn);
			
 
				 	int (*hardware_setup)(void);               /* __init */
			
@@ -477,8 +481,8 @@ struct kvm_x86_ops {
 
				 	void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
			
 
				 	void (*vcpu_put)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-	int (*set_guest_debug)(struct kvm_vcpu *vcpu,
			
 
				-			       struct kvm_guest_debug *dbg);
			
 
				+	void (*set_guest_debug)(struct kvm_vcpu *vcpu,
			
 
				+				struct kvm_guest_debug *dbg);
			
 
				 	int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata);
			
 
				 	int (*set_msr)(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
			
 
				 	u64 (*get_segment_base)(struct kvm_vcpu *vcpu, int seg);
			
@@ -506,8 +510,8 @@ struct kvm_x86_ops {
 
				 
			
 
				 	void (*tlb_flush)(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				-	void (*run)(struct kvm_vcpu *vcpu, struct kvm_run *run);
			
 
				-	int (*handle_exit)(struct kvm_run *run, struct kvm_vcpu *vcpu);
			
 
				+	void (*run)(struct kvm_vcpu *vcpu);
			
 
				+	int (*handle_exit)(struct kvm_vcpu *vcpu);
			
 
				 	void (*skip_emulated_instruction)(struct kvm_vcpu *vcpu);
			
 
				 	void (*set_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
			
 
				 	u32 (*get_interrupt_shadow)(struct kvm_vcpu *vcpu, int mask);
			
@@ -519,6 +523,8 @@ struct kvm_x86_ops {
 
				 				bool has_error_code, u32 error_code);
			
 
				 	int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
			
 
				 	int (*nmi_allowed)(struct kvm_vcpu *vcpu);
			
 
				+	bool (*get_nmi_mask)(struct kvm_vcpu *vcpu);
			
 
				+	void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked);
			
 
				 	void (*enable_nmi_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*enable_irq_window)(struct kvm_vcpu *vcpu);
			
 
				 	void (*update_cr8_intercept)(struct kvm_vcpu *vcpu, int tpr, int irr);
			
@@ -568,7 +574,7 @@ enum emulation_result {
 
				 #define EMULTYPE_NO_DECODE	    (1 << 0)
			
 
				 #define EMULTYPE_TRAP_UD	    (1 << 1)
			
 
				 #define EMULTYPE_SKIP		    (1 << 2)
			
 
				-int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
			
 
				+int emulate_instruction(struct kvm_vcpu *vcpu,
			
 
				 			unsigned long cr2, u16 error_code, int emulation_type);
			
 
				 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
			
 
				 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
			
@@ -585,9 +591,9 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
 
				 
			
 
				 struct x86_emulate_ctxt;
			
 
				 
			
 
				-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in,
			
 
				 		     int size, unsigned port);
			
 
				-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
			
 
				 			   int size, unsigned long count, int down,
			
 
				 			    gva_t address, int rep, unsigned port);
			
 
				 void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
			
@@ -616,6 +622,9 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
 
				 int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata);
			
 
				 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data);
			
 
				 
			
 
				+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
			
 
				+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
			
 
				+
			
 
				 void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
			
 
				 void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
			
 
				 void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2,
			
@@ -802,4 +811,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 
				 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
			
 
				 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
			
 
				 
			
 
				+void kvm_define_shared_msr(unsigned index, u32 msr);
			
 
				+void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
			
 
				+
			
 
				 #endif /* _ASM_X86_KVM_HOST_H */
			
--- a/arch/x86/include/asm/svm.h
+++ b/arch/x86/include/asm/svm.h
@@ -57,7 +57,8 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
 
				 	u16 intercept_dr_write;
			
 
				 	u32 intercept_exceptions;
			
 
				 	u64 intercept;
			
 
				-	u8 reserved_1[44];
			
 
				+	u8 reserved_1[42];
			
 
				+	u16 pause_filter_count;
			
 
				 	u64 iopm_base_pa;
			
 
				 	u64 msrpm_base_pa;
			
 
				 	u64 tsc_offset;
			
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -83,6 +83,7 @@ struct thread_info {
 
				 #define TIF_SYSCALL_AUDIT	7	/* syscall auditing active */
			
 
				 #define TIF_SECCOMP		8	/* secure computing */
			
 
				 #define TIF_MCE_NOTIFY		10	/* notify userspace of an MCE */
			
 
				+#define TIF_USER_RETURN_NOTIFY	11	/* notify kernel of userspace return */
			
 
				 #define TIF_NOTSC		16	/* TSC is not accessible in userland */
			
 
				 #define TIF_IA32		17	/* 32bit process */
			
 
				 #define TIF_FORK		18	/* ret_from_fork */
			
@@ -107,6 +108,7 @@ struct thread_info {
 
				 #define _TIF_SYSCALL_AUDIT	(1 << TIF_SYSCALL_AUDIT)
			
 
				 #define _TIF_SECCOMP		(1 << TIF_SECCOMP)
			
 
				 #define _TIF_MCE_NOTIFY		(1 << TIF_MCE_NOTIFY)
			
 
				+#define _TIF_USER_RETURN_NOTIFY	(1 << TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_NOTSC		(1 << TIF_NOTSC)
			
 
				 #define _TIF_IA32		(1 << TIF_IA32)
			
 
				 #define _TIF_FORK		(1 << TIF_FORK)
			
@@ -142,13 +144,14 @@ struct thread_info {
 
				 
			
 
				 /* Only used for 64 bit */
			
 
				 #define _TIF_DO_NOTIFY_MASK						\
			
 
				-	(_TIF_SIGPENDING|_TIF_MCE_NOTIFY|_TIF_NOTIFY_RESUME)
			
 
				+	(_TIF_SIGPENDING | _TIF_MCE_NOTIFY | _TIF_NOTIFY_RESUME |	\
			
 
				+	 _TIF_USER_RETURN_NOTIFY)
			
 
				 
			
 
				 /* flags to check in __switch_to() */
			
 
				 #define _TIF_WORK_CTXSW							\
			
 
				 	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
			
 
				 
			
 
				-#define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW
			
 
				+#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
			
 
				 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
			
 
				 
			
 
				 #define PREEMPT_ACTIVE		0x10000000
			
--- a/arch/x86/include/asm/vmx.h
+++ b/arch/x86/include/asm/vmx.h
@@ -56,6 +56,7 @@
 
				 #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
			
 
				 #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
			
 
				 #define SECONDARY_EXEC_UNRESTRICTED_GUEST	0x00000080
			
 
				+#define SECONDARY_EXEC_PAUSE_LOOP_EXITING	0x00000400
			
 
				 
			
 
				 
			
 
				 #define PIN_BASED_EXT_INTR_MASK                 0x00000001
			
@@ -144,6 +145,8 @@ enum vmcs_field {
 
				 	VM_ENTRY_INSTRUCTION_LEN        = 0x0000401a,
			
 
				 	TPR_THRESHOLD                   = 0x0000401c,
			
 
				 	SECONDARY_VM_EXEC_CONTROL       = 0x0000401e,
			
 
				+	PLE_GAP                         = 0x00004020,
			
 
				+	PLE_WINDOW                      = 0x00004022,
			
 
				 	VM_INSTRUCTION_ERROR            = 0x00004400,
			
 
				 	VM_EXIT_REASON                  = 0x00004402,
			
 
				 	VM_EXIT_INTR_INFO               = 0x00004404,
			
@@ -248,6 +251,7 @@ enum vmcs_field {
 
				 #define EXIT_REASON_MSR_READ            31
			
 
				 #define EXIT_REASON_MSR_WRITE           32
			
 
				 #define EXIT_REASON_MWAIT_INSTRUCTION   36
			
 
				+#define EXIT_REASON_PAUSE_INSTRUCTION   40
			
 
				 #define EXIT_REASON_MCE_DURING_VMENTRY	 41
			
 
				 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
			
 
				 #define EXIT_REASON_APIC_ACCESS         44
			
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -9,6 +9,7 @@
 
				 #include <linux/pm.h>
			
 
				 #include <linux/clockchips.h>
			
 
				 #include <linux/random.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 #include <trace/events/power.h>
			
 
				 #include <linux/hw_breakpoint.h>
			
 
				 #include <asm/system.h>
			
@@ -209,6 +210,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 
				 		 */
			
 
				 		memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
			
 
				 	}
			
 
				+	propagate_user_return_notify(prev_p, next_p);
			
 
				 }
			
 
				 
			
 
				 int sys_fork(struct pt_regs *regs)
			
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -19,6 +19,7 @@
 
				 #include <linux/stddef.h>
			
 
				 #include <linux/personality.h>
			
 
				 #include <linux/uaccess.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/ucontext.h>
			
@@ -863,6 +864,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
 
				 		if (current->replacement_session_keyring)
			
 
				 			key_replace_session_keyring();
			
 
				 	}
			
 
				+	if (thread_info_flags & _TIF_USER_RETURN_NOTIFY)
			
 
				+		fire_user_return_notifiers();
			
 
				 
			
 
				 #ifdef CONFIG_X86_32
			
 
				 	clear_thread_flag(TIF_IRET);
			
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -28,6 +28,7 @@ config KVM
 
				 	select HAVE_KVM_IRQCHIP
			
 
				 	select HAVE_KVM_EVENTFD
			
 
				 	select KVM_APIC_ARCHITECTURE
			
 
				+	select USER_RETURN_NOTIFIER
			
 
				 	---help---
			
 
				 	  Support hosting fully virtualized guest machines using hardware
			
 
				 	  virtualization extensions.  You will need a fairly recent
			
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -6,7 +6,8 @@ CFLAGS_svm.o := -I.
 
				 CFLAGS_vmx.o := -I.
			
 
				 
			
 
				 kvm-y			+= $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
			
 
				-				coalesced_mmio.o irq_comm.o eventfd.o)
			
 
				+				coalesced_mmio.o irq_comm.o eventfd.o \
			
 
				+				assigned-dev.o)
			
 
				 kvm-$(CONFIG_IOMMU_API)	+= $(addprefix ../../../virt/kvm/, iommu.o)
			
 
				 
			
 
				 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
			
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -75,6 +75,8 @@
 
				 #define Group       (1<<14)     /* Bits 3:5 of modrm byte extend opcode */
			
 
				 #define GroupDual   (1<<15)     /* Alternate decoding of mod == 3 */
			
 
				 #define GroupMask   0xff        /* Group number stored in bits 0:7 */
			
 
				+/* Misc flags */
			
 
				+#define No64	    (1<<28)
			
 
				 /* Source 2 operand type */
			
 
				 #define Src2None    (0<<29)
			
 
				 #define Src2CL      (1<<29)
			
@@ -92,19 +94,23 @@ static u32 opcode_table[256] = {
 
				 	/* 0x00 - 0x07 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x08 - 0x0F */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	0, 0, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, 0,
			
 
				 	/* 0x10 - 0x17 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x18 - 0x1F */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
 
				-	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm, 0, 0,
			
 
				+	ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				 	/* 0x20 - 0x27 */
			
 
				 	ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
			
 
				 	ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
			
@@ -133,7 +139,8 @@ static u32 opcode_table[256] = {
 
				 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				 	DstReg | Stack, DstReg | Stack, DstReg | Stack, DstReg | Stack,
			
 
				 	/* 0x60 - 0x67 */
			
 
				-	0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				+	ImplicitOps | Stack | No64, ImplicitOps | Stack | No64,
			
 
				+	0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
			
 
				 	0, 0, 0, 0,
			
 
				 	/* 0x68 - 0x6F */
			
 
				 	SrcImm | Mov | Stack, 0, SrcImmByte | Mov | Stack, 0,
			
@@ -158,7 +165,7 @@ static u32 opcode_table[256] = {
 
				 	/* 0x90 - 0x97 */
			
 
				 	DstReg, DstReg, DstReg, DstReg,	DstReg, DstReg, DstReg, DstReg,
			
 
				 	/* 0x98 - 0x9F */
			
 
				-	0, 0, SrcImm | Src2Imm16, 0,
			
 
				+	0, 0, SrcImm | Src2Imm16 | No64, 0,
			
 
				 	ImplicitOps | Stack, ImplicitOps | Stack, 0, 0,
			
 
				 	/* 0xA0 - 0xA7 */
			
 
				 	ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
			
@@ -185,7 +192,7 @@ static u32 opcode_table[256] = {
 
				 	ByteOp | DstMem | SrcImm | ModRM | Mov, DstMem | SrcImm | ModRM | Mov,
			
 
				 	/* 0xC8 - 0xCF */
			
 
				 	0, 0, 0, ImplicitOps | Stack,
			
 
				-	ImplicitOps, SrcImmByte, ImplicitOps, ImplicitOps,
			
 
				+	ImplicitOps, SrcImmByte, ImplicitOps | No64, ImplicitOps,
			
 
				 	/* 0xD0 - 0xD7 */
			
 
				 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
 
				 	ByteOp | DstMem | SrcImplicit | ModRM, DstMem | SrcImplicit | ModRM,
			
@@ -198,7 +205,7 @@ static u32 opcode_table[256] = {
 
				 	ByteOp | SrcImmUByte, SrcImmUByte,
			
 
				 	/* 0xE8 - 0xEF */
			
 
				 	SrcImm | Stack, SrcImm | ImplicitOps,
			
 
				-	SrcImmU | Src2Imm16, SrcImmByte | ImplicitOps,
			
 
				+	SrcImmU | Src2Imm16 | No64, SrcImmByte | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
			
 
				 	/* 0xF0 - 0xF7 */
			
@@ -244,11 +251,13 @@ static u32 twobyte_table[256] = {
 
				 	/* 0x90 - 0x9F */
			
 
				 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
			
 
				 	/* 0xA0 - 0xA7 */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
			
 
				+	ImplicitOps | Stack, ImplicitOps | Stack,
			
 
				+	0, DstMem | SrcReg | ModRM | BitOp,
			
 
				 	DstMem | SrcReg | Src2ImmByte | ModRM,
			
 
				 	DstMem | SrcReg | Src2CL | ModRM, 0, 0,
			
 
				 	/* 0xA8 - 0xAF */
			
 
				-	0, 0, 0, DstMem | SrcReg | ModRM | BitOp,
			
 
				+	ImplicitOps | Stack, ImplicitOps | Stack,
			
 
				+	0, DstMem | SrcReg | ModRM | BitOp,
			
 
				 	DstMem | SrcReg | Src2ImmByte | ModRM,
			
 
				 	DstMem | SrcReg | Src2CL | ModRM,
			
 
				 	ModRM, 0,
			
@@ -613,6 +622,9 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt,
 
				 {
			
 
				 	int rc = 0;
			
 
				 
			
 
				+	/* x86 instructions are limited to 15 bytes. */
			
 
				+	if (eip + size - ctxt->decode.eip_orig > 15)
			
 
				+		return X86EMUL_UNHANDLEABLE;
			
 
				 	eip += ctxt->cs_base;
			
 
				 	while (size--) {
			
 
				 		rc = do_fetch_insn_byte(ctxt, ops, eip++, dest++);
			
@@ -871,7 +883,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 	/* Shadow copy of register state. Committed on successful emulation. */
			
 
				 
			
 
				 	memset(c, 0, sizeof(struct decode_cache));
			
 
				-	c->eip = kvm_rip_read(ctxt->vcpu);
			
 
				+	c->eip = c->eip_orig = kvm_rip_read(ctxt->vcpu);
			
 
				 	ctxt->cs_base = seg_base(ctxt, VCPU_SREG_CS);
			
 
				 	memcpy(c->regs, ctxt->vcpu->arch.regs, sizeof c->regs);
			
 
				 
			
@@ -962,6 +974,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 		}
			
 
				 	}
			
 
				 
			
 
				+	if (mode == X86EMUL_MODE_PROT64 && (c->d & No64)) {
			
 
				+		kvm_report_emulation_failure(ctxt->vcpu, "invalid x86/64 instruction");;
			
 
				+		return -1;
			
 
				+	}
			
 
				+
			
 
				 	if (c->d & Group) {
			
 
				 		group = c->d & GroupMask;
			
 
				 		c->modrm = insn_fetch(u8, 1, c->eip);
			
@@ -1186,6 +1203,69 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
 
				 	return rc;
			
 
				 }
			
 
				 
			
 
				+static void emulate_push_sreg(struct x86_emulate_ctxt *ctxt, int seg)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	struct kvm_segment segment;
			
 
				+
			
 
				+	kvm_x86_ops->get_segment(ctxt->vcpu, &segment, seg);
			
 
				+
			
 
				+	c->src.val = segment.selector;
			
 
				+	emulate_push(ctxt);
			
 
				+}
			
 
				+
			
 
				+static int emulate_pop_sreg(struct x86_emulate_ctxt *ctxt,
			
 
				+			     struct x86_emulate_ops *ops, int seg)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	unsigned long selector;
			
 
				+	int rc;
			
 
				+
			
 
				+	rc = emulate_pop(ctxt, ops, &selector, c->op_bytes);
			
 
				+	if (rc != 0)
			
 
				+		return rc;
			
 
				+
			
 
				+	rc = kvm_load_segment_descriptor(ctxt->vcpu, (u16)selector, 1, seg);
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				+static void emulate_pusha(struct x86_emulate_ctxt *ctxt)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	unsigned long old_esp = c->regs[VCPU_REGS_RSP];
			
 
				+	int reg = VCPU_REGS_RAX;
			
 
				+
			
 
				+	while (reg <= VCPU_REGS_RDI) {
			
 
				+		(reg == VCPU_REGS_RSP) ?
			
 
				+		(c->src.val = old_esp) : (c->src.val = c->regs[reg]);
			
 
				+
			
 
				+		emulate_push(ctxt);
			
 
				+		++reg;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int emulate_popa(struct x86_emulate_ctxt *ctxt,
			
 
				+			struct x86_emulate_ops *ops)
			
 
				+{
			
 
				+	struct decode_cache *c = &ctxt->decode;
			
 
				+	int rc = 0;
			
 
				+	int reg = VCPU_REGS_RDI;
			
 
				+
			
 
				+	while (reg >= VCPU_REGS_RAX) {
			
 
				+		if (reg == VCPU_REGS_RSP) {
			
 
				+			register_address_increment(c, &c->regs[VCPU_REGS_RSP],
			
 
				+							c->op_bytes);
			
 
				+			--reg;
			
 
				+		}
			
 
				+
			
 
				+		rc = emulate_pop(ctxt, ops, &c->regs[reg], c->op_bytes);
			
 
				+		if (rc != 0)
			
 
				+			break;
			
 
				+		--reg;
			
 
				+	}
			
 
				+	return rc;
			
 
				+}
			
 
				+
			
 
				 static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
			
 
				 				struct x86_emulate_ops *ops)
			
 
				 {
			
@@ -1707,18 +1787,45 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 	      add:		/* add */
			
 
				 		emulate_2op_SrcV("add", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x06:		/* push es */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_ES);
			
 
				+		break;
			
 
				+	case 0x07:		/* pop es */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_ES);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x08 ... 0x0d:
			
 
				 	      or:		/* or */
			
 
				 		emulate_2op_SrcV("or", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x0e:		/* push cs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_CS);
			
 
				+		break;
			
 
				 	case 0x10 ... 0x15:
			
 
				 	      adc:		/* adc */
			
 
				 		emulate_2op_SrcV("adc", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x16:		/* push ss */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_SS);
			
 
				+		break;
			
 
				+	case 0x17:		/* pop ss */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_SS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x18 ... 0x1d:
			
 
				 	      sbb:		/* sbb */
			
 
				 		emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0x1e:		/* push ds */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_DS);
			
 
				+		break;
			
 
				+	case 0x1f:		/* pop ds */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_DS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x20 ... 0x25:
			
 
				 	      and:		/* and */
			
 
				 		emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
			
@@ -1750,6 +1857,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 		if (rc != 0)
			
 
				 			goto done;
			
 
				 		break;
			
 
				+	case 0x60:	/* pusha */
			
 
				+		emulate_pusha(ctxt);
			
 
				+		break;
			
 
				+	case 0x61:	/* popa */
			
 
				+		rc = emulate_popa(ctxt, ops);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0x63:		/* movsxd */
			
 
				 		if (ctxt->mode != X86EMUL_MODE_PROT64)
			
 
				 			goto cannot_emulate;
			
@@ -1761,7 +1876,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 		break;
			
 
				 	case 0x6c:		/* insb */
			
 
				 	case 0x6d:		/* insw/insd */
			
 
				-		 if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+		 if (kvm_emulate_pio_string(ctxt->vcpu,
			
 
				 				1,
			
 
				 				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				c->rep_prefix ?
			
@@ -1777,7 +1892,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 		return 0;
			
 
				 	case 0x6e:		/* outsb */
			
 
				 	case 0x6f:		/* outsw/outsd */
			
 
				-		if (kvm_emulate_pio_string(ctxt->vcpu, NULL,
			
 
				+		if (kvm_emulate_pio_string(ctxt->vcpu,
			
 
				 				0,
			
 
				 				(c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				c->rep_prefix ?
			
@@ -2070,7 +2185,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 	case 0xef: /* out (e/r)ax,dx */
			
 
				 		port = c->regs[VCPU_REGS_RDX];
			
 
				 		io_dir_in = 0;
			
 
				-	do_io:	if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
			
 
				+	do_io:	if (kvm_emulate_pio(ctxt->vcpu, io_dir_in,
			
 
				 				   (c->d & ByteOp) ? 1 : c->op_bytes,
			
 
				 				   port) != 0) {
			
 
				 			c->eip = saved_eip;
			
@@ -2297,6 +2412,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 			jmp_rel(c, c->src.val);
			
 
				 		c->dst.type = OP_NONE;
			
 
				 		break;
			
 
				+	case 0xa0:	  /* push fs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_FS);
			
 
				+		break;
			
 
				+	case 0xa1:	 /* pop fs */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_FS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0xa3:
			
 
				 	      bt:		/* bt */
			
 
				 		c->dst.type = OP_NONE;
			
@@ -2308,6 +2431,14 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 
				 	case 0xa5: /* shld cl, r, r/m */
			
 
				 		emulate_2op_cl("shld", c->src2, c->src, c->dst, ctxt->eflags);
			
 
				 		break;
			
 
				+	case 0xa8:	/* push gs */
			
 
				+		emulate_push_sreg(ctxt, VCPU_SREG_GS);
			
 
				+		break;
			
 
				+	case 0xa9:	/* pop gs */
			
 
				+		rc = emulate_pop_sreg(ctxt, ops, VCPU_SREG_GS);
			
 
				+		if (rc != 0)
			
 
				+			goto done;
			
 
				+		break;
			
 
				 	case 0xab:
			
 
				 	      bts:		/* bts */
			
 
				 		/* only subword offset */
			
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -688,10 +688,8 @@ static void __inject_pit_timer_intr(struct kvm *kvm)
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int i;
			
 
				 
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 1);
			
 
				 	kvm_set_irq(kvm, kvm->arch.vpit->irq_source_id, 0, 0);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	/*
			
 
				 	 * Provides NMI watchdog support via Virtual Wire mode.
			
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -38,7 +38,15 @@ static void pic_clear_isr(struct kvm_kpic_state *s, int irq)
 
				 	s->isr_ack |= (1 << irq);
			
 
				 	if (s != &s->pics_state->pics[0])
			
 
				 		irq += 8;
			
 
				+	/*
			
 
				+	 * We are dropping lock while calling ack notifiers since ack
			
 
				+	 * notifier callbacks for assigned devices call into PIC recursively.
			
 
				+	 * Other interrupt may be delivered to PIC while lock is dropped but
			
 
				+	 * it should be safe since PIC state is already updated at this stage.
			
 
				+	 */
			
 
				+	spin_unlock(&s->pics_state->lock);
			
 
				 	kvm_notify_acked_irq(s->pics_state->kvm, SELECT_PIC(irq), irq);
			
 
				+	spin_lock(&s->pics_state->lock);
			
 
				 }
			
 
				 
			
 
				 void kvm_pic_clear_isr_ack(struct kvm *kvm)
			
@@ -176,16 +184,18 @@ int kvm_pic_set_irq(void *opaque, int irq, int level)
 
				 static inline void pic_intack(struct kvm_kpic_state *s, int irq)
			
 
				 {
			
 
				 	s->isr |= 1 << irq;
			
 
				-	if (s->auto_eoi) {
			
 
				-		if (s->rotate_on_auto_eoi)
			
 
				-			s->priority_add = (irq + 1) & 7;
			
 
				-		pic_clear_isr(s, irq);
			
 
				-	}
			
 
				 	/*
			
 
				 	 * We don't clear a level sensitive interrupt here
			
 
				 	 */
			
 
				 	if (!(s->elcr & (1 << irq)))
			
 
				 		s->irr &= ~(1 << irq);
			
 
				+
			
 
				+	if (s->auto_eoi) {
			
 
				+		if (s->rotate_on_auto_eoi)
			
 
				+			s->priority_add = (irq + 1) & 7;
			
 
				+		pic_clear_isr(s, irq);
			
 
				+	}
			
 
				+
			
 
				 }
			
 
				 
			
 
				 int kvm_pic_read_irq(struct kvm *kvm)
			
@@ -225,22 +235,11 @@ int kvm_pic_read_irq(struct kvm *kvm)
 
				 
			
 
				 void kvm_pic_reset(struct kvm_kpic_state *s)
			
 
				 {
			
 
				-	int irq, irqbase, n;
			
 
				+	int irq;
			
 
				 	struct kvm *kvm = s->pics_state->irq_request_opaque;
			
 
				 	struct kvm_vcpu *vcpu0 = kvm->bsp_vcpu;
			
 
				+	u8 irr = s->irr, isr = s->imr;
			
 
				 
			
 
				-	if (s == &s->pics_state->pics[0])
			
 
				-		irqbase = 0;
			
 
				-	else
			
 
				-		irqbase = 8;
			
 
				-
			
 
				-	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
			
 
				-		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
			
 
				-			if (s->irr & (1 << irq) || s->isr & (1 << irq)) {
			
 
				-				n = irq + irqbase;
			
 
				-				kvm_notify_acked_irq(kvm, SELECT_PIC(n), n);
			
 
				-			}
			
 
				-	}
			
 
				 	s->last_irr = 0;
			
 
				 	s->irr = 0;
			
 
				 	s->imr = 0;
			
@@ -256,6 +255,13 @@ void kvm_pic_reset(struct kvm_kpic_state *s)
 
				 	s->rotate_on_auto_eoi = 0;
			
 
				 	s->special_fully_nested_mode = 0;
			
 
				 	s->init4 = 0;
			
 
				+
			
 
				+	for (irq = 0; irq < PIC_NUM_PINS/2; irq++) {
			
 
				+		if (vcpu0 && kvm_apic_accept_pic_intr(vcpu0))
			
 
				+			if (irr & (1 << irq) || isr & (1 << irq)) {
			
 
				+				pic_clear_isr(s, irq);
			
 
				+			}
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void pic_ioport_write(void *opaque, u32 addr, u32 val)
			
@@ -298,9 +304,9 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 val)
 
				 				priority = get_priority(s, s->isr);
			
 
				 				if (priority != 8) {
			
 
				 					irq = (priority + s->priority_add) & 7;
			
 
				-					pic_clear_isr(s, irq);
			
 
				 					if (cmd == 5)
			
 
				 						s->priority_add = (irq + 1) & 7;
			
 
				+					pic_clear_isr(s, irq);
			
 
				 					pic_update_irq(s->pics_state);
			
 
				 				}
			
 
				 				break;
			
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -71,6 +71,7 @@ struct kvm_pic {
 
				 	int output;		/* intr from master PIC */
			
 
				 	struct kvm_io_device dev;
			
 
				 	void (*ack_notifier)(void *opaque, int irq);
			
 
				+	unsigned long irq_states[16];
			
 
				 };
			
 
				 
			
 
				 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
			
@@ -85,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
 
				 
			
 
				 static inline int irqchip_in_kernel(struct kvm *kvm)
			
 
				 {
			
 
				-	return pic_irqchip(kvm) != NULL;
			
 
				+	int ret;
			
 
				+
			
 
				+	ret = (pic_irqchip(kvm) != NULL);
			
 
				+	smp_rmb();
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 void kvm_pic_reset(struct kvm_kpic_state *s);
			
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -32,7 +32,6 @@
 
				 #include <asm/current.h>
			
 
				 #include <asm/apicdef.h>
			
 
				 #include <asm/atomic.h>
			
 
				-#include <asm/apicdef.h>
			
 
				 #include "kvm_cache_regs.h"
			
 
				 #include "irq.h"
			
 
				 #include "trace.h"
			
@@ -471,11 +470,8 @@ static void apic_set_eoi(struct kvm_lapic *apic)
 
				 		trigger_mode = IOAPIC_LEVEL_TRIG;
			
 
				 	else
			
 
				 		trigger_mode = IOAPIC_EDGE_TRIG;
			
 
				-	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI)) {
			
 
				-		mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				+	if (!(apic_get_reg(apic, APIC_SPIV) & APIC_SPIV_DIRECTED_EOI))
			
 
				 		kvm_ioapic_update_eoi(apic->vcpu->kvm, vector, trigger_mode);
			
 
				-		mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				-	}
			
 
				 }
			
 
				 
			
 
				 static void apic_send_ipi(struct kvm_lapic *apic)
			
@@ -504,9 +500,7 @@ static void apic_send_ipi(struct kvm_lapic *apic)
 
				 		   irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
			
 
				 		   irq.vector);
			
 
				 
			
 
				-	mutex_lock(&apic->vcpu->kvm->irq_lock);
			
 
				 	kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq);
			
 
				-	mutex_unlock(&apic->vcpu->kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 static u32 apic_get_tmcct(struct kvm_lapic *apic)
			
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -2789,7 +2789,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 
				 	if (r)
			
 
				 		goto out;
			
 
				 
			
 
				-	er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
			
 
				+	er = emulate_instruction(vcpu, cr2, error_code, 0);
			
 
				 
			
 
				 	switch (er) {
			
 
				 	case EMULATE_DONE:
			
@@ -2800,6 +2800,7 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
 
				 	case EMULATE_FAIL:
			
 
				 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
			
 
				 		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
			
 
				+		vcpu->run->internal.ndata = 0;
			
 
				 		return 0;
			
 
				 	default:
			
 
				 		BUG();
			
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -467,7 +467,6 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
 
				 		level = iterator.level;
			
 
				 		sptep = iterator.sptep;
			
 
				 
			
 
				-		/* FIXME: properly handle invlpg on large guest pages */
			
 
				 		if (level == PT_PAGE_TABLE_LEVEL  ||
			
 
				 		    ((level == PT_DIRECTORY_LEVEL && is_large_pte(*sptep))) ||
			
 
				 		    ((level == PT_PDPE_LEVEL && is_large_pte(*sptep)))) {
			
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -46,6 +46,7 @@ MODULE_LICENSE("GPL");
 
				 #define SVM_FEATURE_NPT  (1 << 0)
			
 
				 #define SVM_FEATURE_LBRV (1 << 1)
			
 
				 #define SVM_FEATURE_SVML (1 << 2)
			
 
				+#define SVM_FEATURE_PAUSE_FILTER (1 << 10)
			
 
				 
			
 
				 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
			
 
				 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
			
@@ -53,15 +54,6 @@ MODULE_LICENSE("GPL");
 
				 
			
 
				 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
			
 
				 
			
 
				-/* Turn on to get debugging output*/
			
 
				-/* #define NESTED_DEBUG */
			
 
				-
			
 
				-#ifdef NESTED_DEBUG
			
 
				-#define nsvm_printk(fmt, args...) printk(KERN_INFO fmt, ## args)
			
 
				-#else
			
 
				-#define nsvm_printk(fmt, args...) do {} while(0)
			
 
				-#endif
			
 
				-
			
 
				 static const u32 host_save_user_msrs[] = {
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
			
@@ -85,6 +77,9 @@ struct nested_state {
 
				 	/* gpa pointers to the real vectors */
			
 
				 	u64 vmcb_msrpm;
			
 
				 
			
 
				+	/* A VMEXIT is required but not yet emulated */
			
 
				+	bool exit_required;
			
 
				+
			
 
				 	/* cache for intercepts of the guest */
			
 
				 	u16 intercept_cr_read;
			
 
				 	u16 intercept_cr_write;
			
@@ -112,6 +107,8 @@ struct vcpu_svm {
 
				 	u32 *msrpm;
			
 
				 
			
 
				 	struct nested_state nested;
			
 
				+
			
 
				+	bool nmi_singlestep;
			
 
				 };
			
 
				 
			
 
				 /* enable NPT for AMD64 and X86 with PAE */
			
@@ -286,7 +283,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				 	if (!svm->next_rip) {
			
 
				-		if (emulate_instruction(vcpu, vcpu->run, 0, 0, EMULTYPE_SKIP) !=
			
 
				+		if (emulate_instruction(vcpu, 0, 0, EMULTYPE_SKIP) !=
			
 
				 				EMULATE_DONE)
			
 
				 			printk(KERN_DEBUG "%s: NOP\n", __func__);
			
 
				 		return;
			
@@ -316,7 +313,7 @@ static void svm_hardware_disable(void *garbage)
 
				 	cpu_svm_disable();
			
 
				 }
			
 
				 
			
 
				-static void svm_hardware_enable(void *garbage)
			
 
				+static int svm_hardware_enable(void *garbage)
			
 
				 {
			
 
				 
			
 
				 	struct svm_cpu_data *svm_data;
			
@@ -325,16 +322,21 @@ static void svm_hardware_enable(void *garbage)
 
				 	struct desc_struct *gdt;
			
 
				 	int me = raw_smp_processor_id();
			
 
				 
			
 
				+	rdmsrl(MSR_EFER, efer);
			
 
				+	if (efer & EFER_SVME)
			
 
				+		return -EBUSY;
			
 
				+
			
 
				 	if (!has_svm()) {
			
 
				-		printk(KERN_ERR "svm_cpu_init: err EOPNOTSUPP on %d\n", me);
			
 
				-		return;
			
 
				+		printk(KERN_ERR "svm_hardware_enable: err EOPNOTSUPP on %d\n",
			
 
				+		       me);
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 	svm_data = per_cpu(svm_data, me);
			
 
				 
			
 
				 	if (!svm_data) {
			
 
				-		printk(KERN_ERR "svm_cpu_init: svm_data is NULL on %d\n",
			
 
				+		printk(KERN_ERR "svm_hardware_enable: svm_data is NULL on %d\n",
			
 
				 		       me);
			
 
				-		return;
			
 
				+		return -EINVAL;
			
 
				 	}
			
 
				 
			
 
				 	svm_data->asid_generation = 1;
			
@@ -345,11 +347,12 @@ static void svm_hardware_enable(void *garbage)
 
				 	gdt = (struct desc_struct *)gdt_descr.base;
			
 
				 	svm_data->tss_desc = (struct kvm_ldttss_desc *)(gdt + GDT_ENTRY_TSS);
			
 
				 
			
 
				-	rdmsrl(MSR_EFER, efer);
			
 
				 	wrmsrl(MSR_EFER, efer | EFER_SVME);
			
 
				 
			
 
				 	wrmsrl(MSR_VM_HSAVE_PA,
			
 
				 	       page_to_pfn(svm_data->save_area) << PAGE_SHIFT);
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void svm_cpu_uninit(int cpu)
			
@@ -476,7 +479,7 @@ static __init int svm_hardware_setup(void)
 
				 		kvm_enable_efer_bits(EFER_SVME);
			
 
				 	}
			
 
				 
			
 
				-	for_each_online_cpu(cpu) {
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				 		r = svm_cpu_init(cpu);
			
 
				 		if (r)
			
 
				 			goto err;
			
@@ -510,7 +513,7 @@ static __exit void svm_hardware_unsetup(void)
 
				 {
			
 
				 	int cpu;
			
 
				 
			
 
				-	for_each_online_cpu(cpu)
			
 
				+	for_each_possible_cpu(cpu)
			
 
				 		svm_cpu_uninit(cpu);
			
 
				 
			
 
				 	__free_pages(pfn_to_page(iopm_base >> PAGE_SHIFT), IOPM_ALLOC_ORDER);
			
@@ -625,11 +628,12 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 	save->rip = 0x0000fff0;
			
 
				 	svm->vcpu.arch.regs[VCPU_REGS_RIP] = save->rip;
			
 
				 
			
 
				-	/*
			
 
				-	 * cr0 val on cpu init should be 0x60000010, we enable cpu
			
 
				-	 * cache by default. the orderly way is to enable cache in bios.
			
 
				+	/* This is the guest-visible cr0 value.
			
 
				+	 * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
			
 
				 	 */
			
 
				-	save->cr0 = 0x00000010 | X86_CR0_PG | X86_CR0_WP;
			
 
				+	svm->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
			
 
				+	kvm_set_cr0(&svm->vcpu, svm->vcpu.arch.cr0);
			
 
				+
			
 
				 	save->cr4 = X86_CR4_PAE;
			
 
				 	/* rdx = ?? */
			
 
				 
			
@@ -644,8 +648,6 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 		control->intercept_cr_write &= ~(INTERCEPT_CR0_MASK|
			
 
				 						 INTERCEPT_CR3_MASK);
			
 
				 		save->g_pat = 0x0007040600070406ULL;
			
 
				-		/* enable caching because the QEMU Bios doesn't enable it */
			
 
				-		save->cr0 = X86_CR0_ET;
			
 
				 		save->cr3 = 0;
			
 
				 		save->cr4 = 0;
			
 
				 	}
			
@@ -654,6 +656,11 @@ static void init_vmcb(struct vcpu_svm *svm)
 
				 	svm->nested.vmcb = 0;
			
 
				 	svm->vcpu.arch.hflags = 0;
			
 
				 
			
 
				+	if (svm_has(SVM_FEATURE_PAUSE_FILTER)) {
			
 
				+		control->pause_filter_count = 3000;
			
 
				+		control->intercept |= (1ULL << INTERCEPT_PAUSE);
			
 
				+	}
			
 
				+
			
 
				 	enable_gif(svm);
			
 
				 }
			
 
				 
			
@@ -758,14 +765,13 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 
				 	int i;
			
 
				 
			
 
				 	if (unlikely(cpu != vcpu->cpu)) {
			
 
				-		u64 tsc_this, delta;
			
 
				+		u64 delta;
			
 
				 
			
 
				 		/*
			
 
				 		 * Make sure that the guest sees a monotonically
			
 
				 		 * increasing TSC.
			
 
				 		 */
			
 
				-		rdtscll(tsc_this);
			
 
				-		delta = vcpu->arch.host_tsc - tsc_this;
			
 
				+		delta = vcpu->arch.host_tsc - native_read_tsc();
			
 
				 		svm->vmcb->control.tsc_offset += delta;
			
 
				 		if (is_nested(svm))
			
 
				 			svm->nested.hsave->control.tsc_offset += delta;
			
@@ -787,7 +793,7 @@ static void svm_vcpu_put(struct kvm_vcpu *vcpu)
 
				 	for (i = 0; i < NR_HOST_SAVE_USER_MSRS; i++)
			
 
				 		wrmsrl(host_save_user_msrs[i], svm->host_user_msrs[i]);
			
 
				 
			
 
				-	rdtscll(vcpu->arch.host_tsc);
			
 
				+	vcpu->arch.host_tsc = native_read_tsc();
			
 
				 }
			
 
				 
			
 
				 static unsigned long svm_get_rflags(struct kvm_vcpu *vcpu)
			
@@ -1045,7 +1051,7 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 
				 	svm->vmcb->control.intercept_exceptions &=
			
 
				 		~((1 << DB_VECTOR) | (1 << BP_VECTOR));
			
 
				 
			
 
				-	if (vcpu->arch.singlestep)
			
 
				+	if (svm->nmi_singlestep)
			
 
				 		svm->vmcb->control.intercept_exceptions |= (1 << DB_VECTOR);
			
 
				 
			
 
				 	if (vcpu->guest_debug & KVM_GUESTDBG_ENABLE) {
			
@@ -1060,26 +1066,16 @@ static void update_db_intercept(struct kvm_vcpu *vcpu)
 
				 		vcpu->guest_debug = 0;
			
 
				 }
			
 
				 
			
 
				-static int svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				+static void svm_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				 {
			
 
				-	int old_debug = vcpu->guest_debug;
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 
			
 
				-	vcpu->guest_debug = dbg->control;
			
 
				-
			
 
				-	update_db_intercept(vcpu);
			
 
				-
			
 
				 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
			
 
				 		svm->vmcb->save.dr7 = dbg->arch.debugreg[7];
			
 
				 	else
			
 
				 		svm->vmcb->save.dr7 = vcpu->arch.dr7;
			
 
				 
			
 
				-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		svm->vmcb->save.rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
			
 
				-	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		svm->vmcb->save.rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				-
			
 
				-	return 0;
			
 
				+	update_db_intercept(vcpu);
			
 
				 }
			
 
				 
			
 
				 static void load_host_msrs(struct kvm_vcpu *vcpu)
			
@@ -1180,7 +1176,7 @@ static void svm_set_dr(struct kvm_vcpu *vcpu, int dr, unsigned long value,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int pf_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u64 fault_address;
			
 
				 	u32 error_code;
			
@@ -1194,17 +1190,19 @@ static int pf_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
			
 
				 }
			
 
				 
			
 
				-static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int db_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	if (!(svm->vcpu.guest_debug &
			
 
				 	      (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) &&
			
 
				-		!svm->vcpu.arch.singlestep) {
			
 
				+		!svm->nmi_singlestep) {
			
 
				 		kvm_queue_exception(&svm->vcpu, DB_VECTOR);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
 
				-	if (svm->vcpu.arch.singlestep) {
			
 
				-		svm->vcpu.arch.singlestep = false;
			
 
				+	if (svm->nmi_singlestep) {
			
 
				+		svm->nmi_singlestep = false;
			
 
				 		if (!(svm->vcpu.guest_debug & KVM_GUESTDBG_SINGLESTEP))
			
 
				 			svm->vmcb->save.rflags &=
			
 
				 				~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
@@ -1223,25 +1221,27 @@ static int db_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int bp_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int bp_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	kvm_run->exit_reason = KVM_EXIT_DEBUG;
			
 
				 	kvm_run->debug.arch.pc = svm->vmcb->save.cs.base + svm->vmcb->save.rip;
			
 
				 	kvm_run->debug.arch.exception = BP_VECTOR;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int ud_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	int er;
			
 
				 
			
 
				-	er = emulate_instruction(&svm->vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
			
 
				+	er = emulate_instruction(&svm->vcpu, 0, 0, EMULTYPE_TRAP_UD);
			
 
				 	if (er != EMULATE_DONE)
			
 
				 		kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nm_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
			
 
				 	if (!(svm->vcpu.arch.cr0 & X86_CR0_TS))
			
@@ -1251,7 +1251,7 @@ static int nm_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int mc_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	/*
			
 
				 	 * On an #MC intercept the MCE handler is not called automatically in
			
@@ -1264,8 +1264,10 @@ static int mc_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int shutdown_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	/*
			
 
				 	 * VMCB is undefined after a SHUTDOWN intercept
			
 
				 	 * so reinitialize it.
			
@@ -1277,7 +1279,7 @@ static int shutdown_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int io_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
			
 
				 	int size, in, string;
			
@@ -1291,7 +1293,7 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 
			
 
				 	if (string) {
			
 
				 		if (emulate_instruction(&svm->vcpu,
			
 
				-					kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
			
 
				+					0, 0, 0) == EMULATE_DO_MMIO)
			
 
				 			return 0;
			
 
				 		return 1;
			
 
				 	}
			
@@ -1301,33 +1303,33 @@ static int io_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
			
 
				 
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
 
				-	return kvm_emulate_pio(&svm->vcpu, kvm_run, in, size, port);
			
 
				+	return kvm_emulate_pio(&svm->vcpu, in, size, port);
			
 
				 }
			
 
				 
			
 
				-static int nmi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nmi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int intr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int intr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	++svm->vcpu.stat.irq_exits;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int nop_on_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int nop_on_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int halt_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int halt_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 1;
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
 
				 	return kvm_emulate_halt(&svm->vcpu);
			
 
				 }
			
 
				 
			
 
				-static int vmmcall_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmmcall_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
			
 
				 	skip_emulated_instruction(&svm->vcpu);
			
@@ -1378,8 +1380,15 @@ static inline int nested_svm_intr(struct vcpu_svm *svm)
 
				 
			
 
				 	svm->vmcb->control.exit_code = SVM_EXIT_INTR;
			
 
				 
			
 
				-	if (nested_svm_exit_handled(svm)) {
			
 
				-		nsvm_printk("VMexit -> INTR\n");
			
 
				+	if (svm->nested.intercept & 1ULL) {
			
 
				+		/*
			
 
				+		 * The #vmexit can't be emulated here directly because this
			
 
				+		 * code path runs with irqs and preemtion disabled. A
			
 
				+		 * #vmexit emulation might sleep. Only signal request for
			
 
				+		 * the #vmexit here.
			
 
				+		 */
			
 
				+		svm->nested.exit_required = true;
			
 
				+		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
			
 
				 		return 1;
			
 
				 	}
			
 
				 
			
@@ -1390,10 +1399,7 @@ static void *nested_svm_map(struct vcpu_svm *svm, u64 gpa, enum km_type idx)
 
				 {
			
 
				 	struct page *page;
			
 
				 
			
 
				-	down_read(&current->mm->mmap_sem);
			
 
				 	page = gfn_to_page(svm->vcpu.kvm, gpa >> PAGE_SHIFT);
			
 
				-	up_read(&current->mm->mmap_sem);
			
 
				-
			
 
				 	if (is_error_page(page))
			
 
				 		goto error;
			
 
				 
			
@@ -1532,14 +1538,12 @@ static int nested_svm_exit_handled(struct vcpu_svm *svm)
 
				 	}
			
 
				 	default: {
			
 
				 		u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR);
			
 
				-		nsvm_printk("exit code: 0x%x\n", exit_code);
			
 
				 		if (svm->nested.intercept & exit_bits)
			
 
				 			vmexit = NESTED_EXIT_DONE;
			
 
				 	}
			
 
				 	}
			
 
				 
			
 
				 	if (vmexit == NESTED_EXIT_DONE) {
			
 
				-		nsvm_printk("#VMEXIT reason=%04x\n", exit_code);
			
 
				 		nested_svm_vmexit(svm);
			
 
				 	}
			
 
				 
			
@@ -1584,6 +1588,12 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	struct vmcb *hsave = svm->nested.hsave;
			
 
				 	struct vmcb *vmcb = svm->vmcb;
			
 
				 
			
 
				+	trace_kvm_nested_vmexit_inject(vmcb->control.exit_code,
			
 
				+				       vmcb->control.exit_info_1,
			
 
				+				       vmcb->control.exit_info_2,
			
 
				+				       vmcb->control.exit_int_info,
			
 
				+				       vmcb->control.exit_int_info_err);
			
 
				+
			
 
				 	nested_vmcb = nested_svm_map(svm, svm->nested.vmcb, KM_USER0);
			
 
				 	if (!nested_vmcb)
			
 
				 		return 1;
			
@@ -1617,6 +1627,22 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	nested_vmcb->control.exit_info_2       = vmcb->control.exit_info_2;
			
 
				 	nested_vmcb->control.exit_int_info     = vmcb->control.exit_int_info;
			
 
				 	nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err;
			
 
				+
			
 
				+	/*
			
 
				+	 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have
			
 
				+	 * to make sure that we do not lose injected events. So check event_inj
			
 
				+	 * here and copy it to exit_int_info if it is valid.
			
 
				+	 * Exit_int_info and event_inj can't be both valid because the case
			
 
				+	 * below only happens on a VMRUN instruction intercept which has
			
 
				+	 * no valid exit_int_info set.
			
 
				+	 */
			
 
				+	if (vmcb->control.event_inj & SVM_EVTINJ_VALID) {
			
 
				+		struct vmcb_control_area *nc = &nested_vmcb->control;
			
 
				+
			
 
				+		nc->exit_int_info     = vmcb->control.event_inj;
			
 
				+		nc->exit_int_info_err = vmcb->control.event_inj_err;
			
 
				+	}
			
 
				+
			
 
				 	nested_vmcb->control.tlb_ctl           = 0;
			
 
				 	nested_vmcb->control.event_inj         = 0;
			
 
				 	nested_vmcb->control.event_inj_err     = 0;
			
@@ -1628,10 +1654,6 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
 
				 	/* Restore the original control entries */
			
 
				 	copy_vmcb_control_area(vmcb, hsave);
			
 
				 
			
 
				-	/* Kill any pending exceptions */
			
 
				-	if (svm->vcpu.arch.exception.pending == true)
			
 
				-		nsvm_printk("WARNING: Pending Exception\n");
			
 
				-
			
 
				 	kvm_clear_exception_queue(&svm->vcpu);
			
 
				 	kvm_clear_interrupt_queue(&svm->vcpu);
			
 
				 
			
@@ -1702,6 +1724,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
				 	/* nested_vmcb is our indicator if nested SVM is activated */
			
 
				 	svm->nested.vmcb = svm->vmcb->save.rax;
			
 
				 
			
 
				+	trace_kvm_nested_vmrun(svm->vmcb->save.rip - 3, svm->nested.vmcb,
			
 
				+			       nested_vmcb->save.rip,
			
 
				+			       nested_vmcb->control.int_ctl,
			
 
				+			       nested_vmcb->control.event_inj,
			
 
				+			       nested_vmcb->control.nested_ctl);
			
 
				+
			
 
				 	/* Clear internal status */
			
 
				 	kvm_clear_exception_queue(&svm->vcpu);
			
 
				 	kvm_clear_interrupt_queue(&svm->vcpu);
			
@@ -1789,28 +1817,15 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
 
				 	svm->nested.intercept            = nested_vmcb->control.intercept;
			
 
				 
			
 
				 	force_new_asid(&svm->vcpu);
			
 
				-	svm->vmcb->control.exit_int_info = nested_vmcb->control.exit_int_info;
			
 
				-	svm->vmcb->control.exit_int_info_err = nested_vmcb->control.exit_int_info_err;
			
 
				 	svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK;
			
 
				-	if (nested_vmcb->control.int_ctl & V_IRQ_MASK) {
			
 
				-		nsvm_printk("nSVM Injecting Interrupt: 0x%x\n",
			
 
				-				nested_vmcb->control.int_ctl);
			
 
				-	}
			
 
				 	if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK)
			
 
				 		svm->vcpu.arch.hflags |= HF_VINTR_MASK;
			
 
				 	else
			
 
				 		svm->vcpu.arch.hflags &= ~HF_VINTR_MASK;
			
 
				 
			
 
				-	nsvm_printk("nSVM exit_int_info: 0x%x | int_state: 0x%x\n",
			
 
				-			nested_vmcb->control.exit_int_info,
			
 
				-			nested_vmcb->control.int_state);
			
 
				-
			
 
				 	svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
			
 
				 	svm->vmcb->control.int_state = nested_vmcb->control.int_state;
			
 
				 	svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
			
 
				-	if (nested_vmcb->control.event_inj & SVM_EVTINJ_VALID)
			
 
				-		nsvm_printk("Injecting Event: 0x%x\n",
			
 
				-				nested_vmcb->control.event_inj);
			
 
				 	svm->vmcb->control.event_inj = nested_vmcb->control.event_inj;
			
 
				 	svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err;
			
 
				 
			
@@ -1837,7 +1852,7 @@ static void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
 
				 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
			
 
				 }
			
 
				 
			
 
				-static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmload_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct vmcb *nested_vmcb;
			
 
				 
			
@@ -1857,7 +1872,7 @@ static int vmload_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmsave_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct vmcb *nested_vmcb;
			
 
				 
			
@@ -1877,10 +1892,8 @@ static int vmsave_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int vmrun_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	nsvm_printk("VMrun\n");
			
 
				-
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
 
				 
			
@@ -1907,7 +1920,7 @@ static int vmrun_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int stgi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
@@ -1920,7 +1933,7 @@ static int stgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int clgi_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (nested_svm_check_permissions(svm))
			
 
				 		return 1;
			
@@ -1937,10 +1950,12 @@ static int clgi_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int invlpga_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = &svm->vcpu;
			
 
				-	nsvm_printk("INVLPGA\n");
			
 
				+
			
 
				+	trace_kvm_invlpga(svm->vmcb->save.rip, vcpu->arch.regs[VCPU_REGS_RCX],
			
 
				+			  vcpu->arch.regs[VCPU_REGS_RAX]);
			
 
				 
			
 
				 	/* Let's treat INVLPGA the same as INVLPG (can be optimized!) */
			
 
				 	kvm_mmu_invlpg(vcpu, vcpu->arch.regs[VCPU_REGS_RAX]);
			
@@ -1950,15 +1965,21 @@ static int invlpga_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invalid_op_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int skinit_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	trace_kvm_skinit(svm->vmcb->save.rip, svm->vcpu.arch.regs[VCPU_REGS_RAX]);
			
 
				+
			
 
				 	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int task_switch_interception(struct vcpu_svm *svm,
			
 
				-				    struct kvm_run *kvm_run)
			
 
				+static int invalid_op_interception(struct vcpu_svm *svm)
			
 
				+{
			
 
				+	kvm_queue_exception(&svm->vcpu, UD_VECTOR);
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int task_switch_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u16 tss_selector;
			
 
				 	int reason;
			
@@ -2008,14 +2029,14 @@ static int task_switch_interception(struct vcpu_svm *svm,
 
				 	return kvm_task_switch(&svm->vcpu, tss_selector, reason);
			
 
				 }
			
 
				 
			
 
				-static int cpuid_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int cpuid_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
			
 
				 	kvm_emulate_cpuid(&svm->vcpu);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int iret_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	++svm->vcpu.stat.nmi_window_exits;
			
 
				 	svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
			
@@ -2023,26 +2044,27 @@ static int iret_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int invlpg_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int invlpg_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	if (emulate_instruction(&svm->vcpu, kvm_run, 0, 0, 0) != EMULATE_DONE)
			
 
				+	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
			
 
				 		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int emulate_on_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int emulate_on_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				-	if (emulate_instruction(&svm->vcpu, NULL, 0, 0, 0) != EMULATE_DONE)
			
 
				+	if (emulate_instruction(&svm->vcpu, 0, 0, 0) != EMULATE_DONE)
			
 
				 		pr_unimpl(&svm->vcpu, "%s: failed\n", __func__);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int cr8_write_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int cr8_write_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
			
 
				 	/* instruction emulation calls kvm_set_cr8() */
			
 
				-	emulate_instruction(&svm->vcpu, NULL, 0, 0, 0);
			
 
				+	emulate_instruction(&svm->vcpu, 0, 0, 0);
			
 
				 	if (irqchip_in_kernel(svm->vcpu.kvm)) {
			
 
				 		svm->vmcb->control.intercept_cr_write &= ~INTERCEPT_CR8_MASK;
			
 
				 		return 1;
			
@@ -2128,7 +2150,7 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 *data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int rdmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int rdmsr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data;
			
@@ -2221,7 +2243,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, unsigned ecx, u64 data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int wrmsr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	u32 ecx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data = (svm->vcpu.arch.regs[VCPU_REGS_RAX] & -1u)
			
@@ -2237,17 +2259,18 @@ static int wrmsr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int msr_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
			
 
				+static int msr_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				 	if (svm->vmcb->control.exit_info_1)
			
 
				-		return wrmsr_interception(svm, kvm_run);
			
 
				+		return wrmsr_interception(svm);
			
 
				 	else
			
 
				-		return rdmsr_interception(svm, kvm_run);
			
 
				+		return rdmsr_interception(svm);
			
 
				 }
			
 
				 
			
 
				-static int interrupt_window_interception(struct vcpu_svm *svm,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int interrupt_window_interception(struct vcpu_svm *svm)
			
 
				 {
			
 
				+	struct kvm_run *kvm_run = svm->vcpu.run;
			
 
				+
			
 
				 	svm_clear_vintr(svm);
			
 
				 	svm->vmcb->control.int_ctl &= ~V_IRQ_MASK;
			
 
				 	/*
			
@@ -2265,8 +2288,13 @@ static int interrupt_window_interception(struct vcpu_svm *svm,
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
			
 
				-				      struct kvm_run *kvm_run) = {
			
 
				+static int pause_interception(struct vcpu_svm *svm)
			
 
				+{
			
 
				+	kvm_vcpu_on_spin(&(svm->vcpu));
			
 
				+	return 1;
			
 
				+}
			
 
				+
			
 
				+static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = {
			
 
				 	[SVM_EXIT_READ_CR0]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR3]           		= emulate_on_interception,
			
 
				 	[SVM_EXIT_READ_CR4]           		= emulate_on_interception,
			
@@ -2301,6 +2329,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_CPUID]			= cpuid_interception,
			
 
				 	[SVM_EXIT_IRET]                         = iret_interception,
			
 
				 	[SVM_EXIT_INVD]                         = emulate_on_interception,
			
 
				+	[SVM_EXIT_PAUSE]			= pause_interception,
			
 
				 	[SVM_EXIT_HLT]				= halt_interception,
			
 
				 	[SVM_EXIT_INVLPG]			= invlpg_interception,
			
 
				 	[SVM_EXIT_INVLPGA]			= invlpga_interception,
			
@@ -2314,26 +2343,36 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm,
 
				 	[SVM_EXIT_VMSAVE]			= vmsave_interception,
			
 
				 	[SVM_EXIT_STGI]				= stgi_interception,
			
 
				 	[SVM_EXIT_CLGI]				= clgi_interception,
			
 
				-	[SVM_EXIT_SKINIT]			= invalid_op_interception,
			
 
				+	[SVM_EXIT_SKINIT]			= skinit_interception,
			
 
				 	[SVM_EXIT_WBINVD]                       = emulate_on_interception,
			
 
				 	[SVM_EXIT_MONITOR]			= invalid_op_interception,
			
 
				 	[SVM_EXIT_MWAIT]			= invalid_op_interception,
			
 
				 	[SVM_EXIT_NPF]				= pf_interception,
			
 
				 };
			
 
				 
			
 
				-static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
			
 
				+static int handle_exit(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+	struct kvm_run *kvm_run = vcpu->run;
			
 
				 	u32 exit_code = svm->vmcb->control.exit_code;
			
 
				 
			
 
				 	trace_kvm_exit(exit_code, svm->vmcb->save.rip);
			
 
				 
			
 
				+	if (unlikely(svm->nested.exit_required)) {
			
 
				+		nested_svm_vmexit(svm);
			
 
				+		svm->nested.exit_required = false;
			
 
				+
			
 
				+		return 1;
			
 
				+	}
			
 
				+
			
 
				 	if (is_nested(svm)) {
			
 
				 		int vmexit;
			
 
				 
			
 
				-		nsvm_printk("nested handle_exit: 0x%x | 0x%lx | 0x%lx | 0x%lx\n",
			
 
				-			    exit_code, svm->vmcb->control.exit_info_1,
			
 
				-			    svm->vmcb->control.exit_info_2, svm->vmcb->save.rip);
			
 
				+		trace_kvm_nested_vmexit(svm->vmcb->save.rip, exit_code,
			
 
				+					svm->vmcb->control.exit_info_1,
			
 
				+					svm->vmcb->control.exit_info_2,
			
 
				+					svm->vmcb->control.exit_int_info,
			
 
				+					svm->vmcb->control.exit_int_info_err);
			
 
				 
			
 
				 		vmexit = nested_svm_exit_special(svm);
			
 
				 
			
@@ -2383,7 +2422,7 @@ static int handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 		return 0;
			
 
				 	}
			
 
				 
			
 
				-	return svm_exit_handlers[exit_code](svm, kvm_run);
			
 
				+	return svm_exit_handlers[exit_code](svm);
			
 
				 }
			
 
				 
			
 
				 static void reload_tss(struct kvm_vcpu *vcpu)
			
@@ -2460,20 +2499,47 @@ static int svm_nmi_allowed(struct kvm_vcpu *vcpu)
 
				 		!(svm->vcpu.arch.hflags & HF_NMI_MASK);
			
 
				 }
			
 
				 
			
 
				+static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+
			
 
				+	return !!(svm->vcpu.arch.hflags & HF_NMI_MASK);
			
 
				+}
			
 
				+
			
 
				+static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
			
 
				+{
			
 
				+	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				+
			
 
				+	if (masked) {
			
 
				+		svm->vcpu.arch.hflags |= HF_NMI_MASK;
			
 
				+		svm->vmcb->control.intercept |= (1UL << INTERCEPT_IRET);
			
 
				+	} else {
			
 
				+		svm->vcpu.arch.hflags &= ~HF_NMI_MASK;
			
 
				+		svm->vmcb->control.intercept &= ~(1UL << INTERCEPT_IRET);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int svm_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	struct vmcb *vmcb = svm->vmcb;
			
 
				-	return (vmcb->save.rflags & X86_EFLAGS_IF) &&
			
 
				-		!(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) &&
			
 
				-		gif_set(svm) &&
			
 
				-		!(is_nested(svm) && (svm->vcpu.arch.hflags & HF_VINTR_MASK));
			
 
				+	int ret;
			
 
				+
			
 
				+	if (!gif_set(svm) ||
			
 
				+	     (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK))
			
 
				+		return 0;
			
 
				+
			
 
				+	ret = !!(vmcb->save.rflags & X86_EFLAGS_IF);
			
 
				+
			
 
				+	if (is_nested(svm))
			
 
				+		return ret && !(svm->vcpu.arch.hflags & HF_VINTR_MASK);
			
 
				+
			
 
				+	return ret;
			
 
				 }
			
 
				 
			
 
				 static void enable_irq_window(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				-	nsvm_printk("Trying to open IRQ window\n");
			
 
				 
			
 
				 	nested_svm_intr(svm);
			
 
				 
			
@@ -2498,7 +2564,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
 
				 	/* Something prevents NMI from been injected. Single step over
			
 
				 	   possible problem (IRET or exception injection or interrupt
			
 
				 	   shadow) */
			
 
				-	vcpu->arch.singlestep = true;
			
 
				+	svm->nmi_singlestep = true;
			
 
				 	svm->vmcb->save.rflags |= (X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				 	update_db_intercept(vcpu);
			
 
				 }
			
@@ -2588,13 +2654,20 @@ static void svm_complete_interrupts(struct vcpu_svm *svm)
 
				 #define R "e"
			
 
				 #endif
			
 
				 
			
 
				-static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static void svm_vcpu_run(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_svm *svm = to_svm(vcpu);
			
 
				 	u16 fs_selector;
			
 
				 	u16 gs_selector;
			
 
				 	u16 ldt_selector;
			
 
				 
			
 
				+	/*
			
 
				+	 * A vmexit emulation is required before the vcpu can be executed
			
 
				+	 * again.
			
 
				+	 */
			
 
				+	if (unlikely(svm->nested.exit_required))
			
 
				+		return;
			
 
				+
			
 
				 	svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX];
			
 
				 	svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP];
			
 
				 	svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP];
			
@@ -2893,6 +2966,8 @@ static struct kvm_x86_ops svm_x86_ops = {
 
				 	.queue_exception = svm_queue_exception,
			
 
				 	.interrupt_allowed = svm_interrupt_allowed,
			
 
				 	.nmi_allowed = svm_nmi_allowed,
			
 
				+	.get_nmi_mask = svm_get_nmi_mask,
			
 
				+	.set_nmi_mask = svm_set_nmi_mask,
			
 
				 	.enable_nmi_window = enable_nmi_window,
			
 
				 	.enable_irq_window = enable_irq_window,
			
 
				 	.update_cr8_intercept = update_cr8_intercept,
			
--- a/arch/x86/kvm/trace.h
+++ b/arch/x86/kvm/trace.h
@@ -349,6 +349,171 @@ TRACE_EVENT(kvm_apic_accept_irq,
 
				 		  __entry->coalesced ? " (coalesced)" : "")
			
 
				 );
			
 
				 
			
 
				+/*
			
 
				+ * Tracepoint for nested VMRUN
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmrun,
			
 
				+	    TP_PROTO(__u64 rip, __u64 vmcb, __u64 nested_rip, __u32 int_ctl,
			
 
				+		     __u32 event_inj, bool npt),
			
 
				+	    TP_ARGS(rip, vmcb, nested_rip, int_ctl, event_inj, npt),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		rip		)
			
 
				+		__field(	__u64,		vmcb		)
			
 
				+		__field(	__u64,		nested_rip	)
			
 
				+		__field(	__u32,		int_ctl		)
			
 
				+		__field(	__u32,		event_inj	)
			
 
				+		__field(	bool,		npt		)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		= rip;
			
 
				+		__entry->vmcb		= vmcb;
			
 
				+		__entry->nested_rip	= nested_rip;
			
 
				+		__entry->int_ctl	= int_ctl;
			
 
				+		__entry->event_inj	= event_inj;
			
 
				+		__entry->npt		= npt;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx vmcb: 0x%016llx nrip: 0x%016llx int_ctl: 0x%08x "
			
 
				+		  "event_inj: 0x%08x npt: %s\n",
			
 
				+		__entry->rip, __entry->vmcb, __entry->nested_rip,
			
 
				+		__entry->int_ctl, __entry->event_inj,
			
 
				+		__entry->npt ? "on" : "off")
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for #VMEXIT while nested
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmexit,
			
 
				+	    TP_PROTO(__u64 rip, __u32 exit_code,
			
 
				+		     __u64 exit_info1, __u64 exit_info2,
			
 
				+		     __u32 exit_int_info, __u32 exit_int_info_err),
			
 
				+	    TP_ARGS(rip, exit_code, exit_info1, exit_info2,
			
 
				+		    exit_int_info, exit_int_info_err),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,		rip			)
			
 
				+		__field(	__u32,		exit_code		)
			
 
				+		__field(	__u64,		exit_info1		)
			
 
				+		__field(	__u64,		exit_info2		)
			
 
				+		__field(	__u32,		exit_int_info		)
			
 
				+		__field(	__u32,		exit_int_info_err	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip			= rip;
			
 
				+		__entry->exit_code		= exit_code;
			
 
				+		__entry->exit_info1		= exit_info1;
			
 
				+		__entry->exit_info2		= exit_info2;
			
 
				+		__entry->exit_int_info		= exit_int_info;
			
 
				+		__entry->exit_int_info_err	= exit_int_info_err;
			
 
				+	),
			
 
				+	TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx "
			
 
				+		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
			
 
				+		  __entry->rip,
			
 
				+		  ftrace_print_symbols_seq(p, __entry->exit_code,
			
 
				+					   kvm_x86_ops->exit_reasons_str),
			
 
				+		  __entry->exit_info1, __entry->exit_info2,
			
 
				+		  __entry->exit_int_info, __entry->exit_int_info_err)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for #VMEXIT reinjected to the guest
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_vmexit_inject,
			
 
				+	    TP_PROTO(__u32 exit_code,
			
 
				+		     __u64 exit_info1, __u64 exit_info2,
			
 
				+		     __u32 exit_int_info, __u32 exit_int_info_err),
			
 
				+	    TP_ARGS(exit_code, exit_info1, exit_info2,
			
 
				+		    exit_int_info, exit_int_info_err),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u32,		exit_code		)
			
 
				+		__field(	__u64,		exit_info1		)
			
 
				+		__field(	__u64,		exit_info2		)
			
 
				+		__field(	__u32,		exit_int_info		)
			
 
				+		__field(	__u32,		exit_int_info_err	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->exit_code		= exit_code;
			
 
				+		__entry->exit_info1		= exit_info1;
			
 
				+		__entry->exit_info2		= exit_info2;
			
 
				+		__entry->exit_int_info		= exit_int_info;
			
 
				+		__entry->exit_int_info_err	= exit_int_info_err;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("reason: %s ext_inf1: 0x%016llx "
			
 
				+		  "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x\n",
			
 
				+		  ftrace_print_symbols_seq(p, __entry->exit_code,
			
 
				+					   kvm_x86_ops->exit_reasons_str),
			
 
				+		__entry->exit_info1, __entry->exit_info2,
			
 
				+		__entry->exit_int_info, __entry->exit_int_info_err)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_nested_intr_vmexit,
			
 
				+	    TP_PROTO(__u64 rip),
			
 
				+	    TP_ARGS(rip),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip	=	rip
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx\n", __entry->rip)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_invlpga,
			
 
				+	    TP_PROTO(__u64 rip, int asid, u64 address),
			
 
				+	    TP_ARGS(rip, asid, address),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+		__field(	int,	asid	)
			
 
				+		__field(	__u64,	address	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		=	rip;
			
 
				+		__entry->asid		=	asid;
			
 
				+		__entry->address	=	address;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx asid: %d address: 0x%016llx\n",
			
 
				+		  __entry->rip, __entry->asid, __entry->address)
			
 
				+);
			
 
				+
			
 
				+/*
			
 
				+ * Tracepoint for nested #vmexit because of interrupt pending
			
 
				+ */
			
 
				+TRACE_EVENT(kvm_skinit,
			
 
				+	    TP_PROTO(__u64 rip, __u32 slb),
			
 
				+	    TP_ARGS(rip, slb),
			
 
				+
			
 
				+	TP_STRUCT__entry(
			
 
				+		__field(	__u64,	rip	)
			
 
				+		__field(	__u32,	slb	)
			
 
				+	),
			
 
				+
			
 
				+	TP_fast_assign(
			
 
				+		__entry->rip		=	rip;
			
 
				+		__entry->slb		=	slb;
			
 
				+	),
			
 
				+
			
 
				+	TP_printk("rip: 0x%016llx slb: 0x%08x\n",
			
 
				+		  __entry->rip, __entry->slb)
			
 
				+);
			
 
				+
			
 
				 #endif /* _TRACE_KVM_H */
			
 
				 
			
 
				 /* This part must be outside protection */
			
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -61,12 +61,37 @@ module_param_named(unrestricted_guest,
 
				 static int __read_mostly emulate_invalid_guest_state = 0;
			
 
				 module_param(emulate_invalid_guest_state, bool, S_IRUGO);
			
 
				 
			
 
				+/*
			
 
				+ * These 2 parameters are used to config the controls for Pause-Loop Exiting:
			
 
				+ * ple_gap:    upper bound on the amount of time between two successive
			
 
				+ *             executions of PAUSE in a loop. Also indicate if ple enabled.
			
 
				+ *             According to test, this time is usually small than 41 cycles.
			
 
				+ * ple_window: upper bound on the amount of time a guest is allowed to execute
			
 
				+ *             in a PAUSE loop. Tests indicate that most spinlocks are held for
			
 
				+ *             less than 2^12 cycles
			
 
				+ * Time is measured based on a counter that runs at the same rate as the TSC,
			
 
				+ * refer SDM volume 3b section 21.6.13 & 22.1.3.
			
 
				+ */
			
 
				+#define KVM_VMX_DEFAULT_PLE_GAP    41
			
 
				+#define KVM_VMX_DEFAULT_PLE_WINDOW 4096
			
 
				+static int ple_gap = KVM_VMX_DEFAULT_PLE_GAP;
			
 
				+module_param(ple_gap, int, S_IRUGO);
			
 
				+
			
 
				+static int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW;
			
 
				+module_param(ple_window, int, S_IRUGO);
			
 
				+
			
 
				 struct vmcs {
			
 
				 	u32 revision_id;
			
 
				 	u32 abort;
			
 
				 	char data[0];
			
 
				 };
			
 
				 
			
 
				+struct shared_msr_entry {
			
 
				+	unsigned index;
			
 
				+	u64 data;
			
 
				+	u64 mask;
			
 
				+};
			
 
				+
			
 
				 struct vcpu_vmx {
			
 
				 	struct kvm_vcpu       vcpu;
			
 
				 	struct list_head      local_vcpus_link;
			
@@ -74,13 +99,12 @@ struct vcpu_vmx {
 
				 	int                   launched;
			
 
				 	u8                    fail;
			
 
				 	u32                   idt_vectoring_info;
			
 
				-	struct kvm_msr_entry *guest_msrs;
			
 
				-	struct kvm_msr_entry *host_msrs;
			
 
				+	struct shared_msr_entry *guest_msrs;
			
 
				 	int                   nmsrs;
			
 
				 	int                   save_nmsrs;
			
 
				-	int                   msr_offset_efer;
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	int                   msr_offset_kernel_gs_base;
			
 
				+	u64 		      msr_host_kernel_gs_base;
			
 
				+	u64 		      msr_guest_kernel_gs_base;
			
 
				 #endif
			
 
				 	struct vmcs          *vmcs;
			
 
				 	struct {
			
@@ -88,7 +112,6 @@ struct vcpu_vmx {
 
				 		u16           fs_sel, gs_sel, ldt_sel;
			
 
				 		int           gs_ldt_reload_needed;
			
 
				 		int           fs_reload_needed;
			
 
				-		int           guest_efer_loaded;
			
 
				 	} host_state;
			
 
				 	struct {
			
 
				 		int vm86_active;
			
@@ -107,7 +130,6 @@ struct vcpu_vmx {
 
				 	} rmode;
			
 
				 	int vpid;
			
 
				 	bool emulation_required;
			
 
				-	enum emulation_result invalid_state_emulation_result;
			
 
				 
			
 
				 	/* Support for vnmi-less CPUs */
			
 
				 	int soft_vnmi_blocked;
			
@@ -176,6 +198,8 @@ static struct kvm_vmx_segment_field {
 
				 	VMX_SEGMENT_FIELD(LDTR),
			
 
				 };
			
 
				 
			
 
				+static u64 host_efer;
			
 
				+
			
 
				 static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 /*
			
@@ -184,28 +208,12 @@ static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
 
				  */
			
 
				 static const u32 vmx_msr_index[] = {
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR, MSR_KERNEL_GS_BASE,
			
 
				+	MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
			
 
				 #endif
			
 
				 	MSR_EFER, MSR_K6_STAR,
			
 
				 };
			
 
				 #define NR_VMX_MSR ARRAY_SIZE(vmx_msr_index)
			
 
				 
			
 
				-static void load_msrs(struct kvm_msr_entry *e, int n)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < n; ++i)
			
 
				-		wrmsrl(e[i].index, e[i].data);
			
 
				-}
			
 
				-
			
 
				-static void save_msrs(struct kvm_msr_entry *e, int n)
			
 
				-{
			
 
				-	int i;
			
 
				-
			
 
				-	for (i = 0; i < n; ++i)
			
 
				-		rdmsrl(e[i].index, e[i].data);
			
 
				-}
			
 
				-
			
 
				 static inline int is_page_fault(u32 intr_info)
			
 
				 {
			
 
				 	return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
			
@@ -320,6 +328,12 @@ static inline int cpu_has_vmx_unrestricted_guest(void)
 
				 		SECONDARY_EXEC_UNRESTRICTED_GUEST;
			
 
				 }
			
 
				 
			
 
				+static inline int cpu_has_vmx_ple(void)
			
 
				+{
			
 
				+	return vmcs_config.cpu_based_2nd_exec_ctrl &
			
 
				+		SECONDARY_EXEC_PAUSE_LOOP_EXITING;
			
 
				+}
			
 
				+
			
 
				 static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
			
 
				 {
			
 
				 	return flexpriority_enabled &&
			
@@ -348,7 +362,7 @@ static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
 
				 	int i;
			
 
				 
			
 
				 	for (i = 0; i < vmx->nmsrs; ++i)
			
 
				-		if (vmx->guest_msrs[i].index == msr)
			
 
				+		if (vmx_msr_index[vmx->guest_msrs[i].index] == msr)
			
 
				 			return i;
			
 
				 	return -1;
			
 
				 }
			
@@ -379,7 +393,7 @@ static inline void __invept(int ext, u64 eptp, gpa_t gpa)
 
				 			: : "a" (&operand), "c" (ext) : "cc", "memory");
			
 
				 }
			
 
				 
			
 
				-static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
			
 
				+static struct shared_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
			
 
				 {
			
 
				 	int i;
			
 
				 
			
@@ -570,17 +584,12 @@ static void reload_tss(void)
 
				 	load_TR_desc();
			
 
				 }
			
 
				 
			
 
				-static void load_transition_efer(struct vcpu_vmx *vmx)
			
 
				+static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset)
			
 
				 {
			
 
				-	int efer_offset = vmx->msr_offset_efer;
			
 
				-	u64 host_efer;
			
 
				 	u64 guest_efer;
			
 
				 	u64 ignore_bits;
			
 
				 
			
 
				-	if (efer_offset < 0)
			
 
				-		return;
			
 
				-	host_efer = vmx->host_msrs[efer_offset].data;
			
 
				-	guest_efer = vmx->guest_msrs[efer_offset].data;
			
 
				+	guest_efer = vmx->vcpu.arch.shadow_efer;
			
 
				 
			
 
				 	/*
			
 
				 	 * NX is emulated; LMA and LME handled by hardware; SCE meaninless
			
@@ -593,27 +602,17 @@ static void load_transition_efer(struct vcpu_vmx *vmx)
 
				 	if (guest_efer & EFER_LMA)
			
 
				 		ignore_bits &= ~(u64)EFER_SCE;
			
 
				 #endif
			
 
				-	if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
			
 
				-		return;
			
 
				-
			
 
				-	vmx->host_state.guest_efer_loaded = 1;
			
 
				 	guest_efer &= ~ignore_bits;
			
 
				 	guest_efer |= host_efer & ignore_bits;
			
 
				-	wrmsrl(MSR_EFER, guest_efer);
			
 
				-	vmx->vcpu.stat.efer_reload++;
			
 
				-}
			
 
				-
			
 
				-static void reload_host_efer(struct vcpu_vmx *vmx)
			
 
				-{
			
 
				-	if (vmx->host_state.guest_efer_loaded) {
			
 
				-		vmx->host_state.guest_efer_loaded = 0;
			
 
				-		load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
			
 
				-	}
			
 
				+	vmx->guest_msrs[efer_offset].data = guest_efer;
			
 
				+	vmx->guest_msrs[efer_offset].mask = ~ignore_bits;
			
 
				+	return true;
			
 
				 }
			
 
				 
			
 
				 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				+	int i;
			
 
				 
			
 
				 	if (vmx->host_state.loaded)
			
 
				 		return;
			
@@ -650,13 +649,15 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 
				 #endif
			
 
				 
			
 
				 #ifdef CONFIG_X86_64
			
 
				-	if (is_long_mode(&vmx->vcpu))
			
 
				-		save_msrs(vmx->host_msrs +
			
 
				-			  vmx->msr_offset_kernel_gs_base, 1);
			
 
				-
			
 
				+	if (is_long_mode(&vmx->vcpu)) {
			
 
				+		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
			
 
				+		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
			
 
				+	}
			
 
				 #endif
			
 
				-	load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
			
 
				-	load_transition_efer(vmx);
			
 
				+	for (i = 0; i < vmx->save_nmsrs; ++i)
			
 
				+		kvm_set_shared_msr(vmx->guest_msrs[i].index,
			
 
				+				   vmx->guest_msrs[i].data,
			
 
				+				   vmx->guest_msrs[i].mask);
			
 
				 }
			
 
				 
			
 
				 static void __vmx_load_host_state(struct vcpu_vmx *vmx)
			
@@ -684,9 +685,12 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx)
 
				 		local_irq_restore(flags);
			
 
				 	}
			
 
				 	reload_tss();
			
 
				-	save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
			
 
				-	load_msrs(vmx->host_msrs, vmx->save_nmsrs);
			
 
				-	reload_host_efer(vmx);
			
 
				+#ifdef CONFIG_X86_64
			
 
				+	if (is_long_mode(&vmx->vcpu)) {
			
 
				+		rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
			
 
				+		wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
			
 
				+	}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 static void vmx_load_host_state(struct vcpu_vmx *vmx)
			
@@ -877,19 +881,14 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 
				 /*
			
 
				  * Swap MSR entry in host/guest MSR entry array.
			
 
				  */
			
 
				-#ifdef CONFIG_X86_64
			
 
				 static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
			
 
				 {
			
 
				-	struct kvm_msr_entry tmp;
			
 
				+	struct shared_msr_entry tmp;
			
 
				 
			
 
				 	tmp = vmx->guest_msrs[to];
			
 
				 	vmx->guest_msrs[to] = vmx->guest_msrs[from];
			
 
				 	vmx->guest_msrs[from] = tmp;
			
 
				-	tmp = vmx->host_msrs[to];
			
 
				-	vmx->host_msrs[to] = vmx->host_msrs[from];
			
 
				-	vmx->host_msrs[from] = tmp;
			
 
				 }
			
 
				-#endif
			
 
				 
			
 
				 /*
			
 
				  * Set up the vmcs to automatically save and restore system
			
@@ -898,15 +897,13 @@ static void move_msr_up(struct vcpu_vmx *vmx, int from, int to)
 
				  */
			
 
				 static void setup_msrs(struct vcpu_vmx *vmx)
			
 
				 {
			
 
				-	int save_nmsrs;
			
 
				+	int save_nmsrs, index;
			
 
				 	unsigned long *msr_bitmap;
			
 
				 
			
 
				 	vmx_load_host_state(vmx);
			
 
				 	save_nmsrs = 0;
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	if (is_long_mode(&vmx->vcpu)) {
			
 
				-		int index;
			
 
				-
			
 
				 		index = __find_msr_index(vmx, MSR_SYSCALL_MASK);
			
 
				 		if (index >= 0)
			
 
				 			move_msr_up(vmx, index, save_nmsrs++);
			
@@ -914,9 +911,6 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 
				 		if (index >= 0)
			
 
				 			move_msr_up(vmx, index, save_nmsrs++);
			
 
				 		index = __find_msr_index(vmx, MSR_CSTAR);
			
 
				-		if (index >= 0)
			
 
				-			move_msr_up(vmx, index, save_nmsrs++);
			
 
				-		index = __find_msr_index(vmx, MSR_KERNEL_GS_BASE);
			
 
				 		if (index >= 0)
			
 
				 			move_msr_up(vmx, index, save_nmsrs++);
			
 
				 		/*
			
@@ -928,13 +922,11 @@ static void setup_msrs(struct vcpu_vmx *vmx)
 
				 			move_msr_up(vmx, index, save_nmsrs++);
			
 
				 	}
			
 
				 #endif
			
 
				-	vmx->save_nmsrs = save_nmsrs;
			
 
				+	index = __find_msr_index(vmx, MSR_EFER);
			
 
				+	if (index >= 0 && update_transition_efer(vmx, index))
			
 
				+		move_msr_up(vmx, index, save_nmsrs++);
			
 
				 
			
 
				-#ifdef CONFIG_X86_64
			
 
				-	vmx->msr_offset_kernel_gs_base =
			
 
				-		__find_msr_index(vmx, MSR_KERNEL_GS_BASE);
			
 
				-#endif
			
 
				-	vmx->msr_offset_efer = __find_msr_index(vmx, MSR_EFER);
			
 
				+	vmx->save_nmsrs = save_nmsrs;
			
 
				 
			
 
				 	if (cpu_has_vmx_msr_bitmap()) {
			
 
				 		if (is_long_mode(&vmx->vcpu))
			
@@ -976,7 +968,7 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 
				 static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
			
 
				 {
			
 
				 	u64 data;
			
 
				-	struct kvm_msr_entry *msr;
			
 
				+	struct shared_msr_entry *msr;
			
 
				 
			
 
				 	if (!pdata) {
			
 
				 		printk(KERN_ERR "BUG: get_msr called with NULL pdata\n");
			
@@ -991,9 +983,13 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
				 	case MSR_GS_BASE:
			
 
				 		data = vmcs_readl(GUEST_GS_BASE);
			
 
				 		break;
			
 
				+	case MSR_KERNEL_GS_BASE:
			
 
				+		vmx_load_host_state(to_vmx(vcpu));
			
 
				+		data = to_vmx(vcpu)->msr_guest_kernel_gs_base;
			
 
				+		break;
			
 
				+#endif
			
 
				 	case MSR_EFER:
			
 
				 		return kvm_get_msr_common(vcpu, msr_index, pdata);
			
 
				-#endif
			
 
				 	case MSR_IA32_TSC:
			
 
				 		data = guest_read_tsc();
			
 
				 		break;
			
@@ -1007,6 +1003,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
				 		data = vmcs_readl(GUEST_SYSENTER_ESP);
			
 
				 		break;
			
 
				 	default:
			
 
				+		vmx_load_host_state(to_vmx(vcpu));
			
 
				 		msr = find_msr_entry(to_vmx(vcpu), msr_index);
			
 
				 		if (msr) {
			
 
				 			vmx_load_host_state(to_vmx(vcpu));
			
@@ -1028,7 +1025,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
 
				 static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				-	struct kvm_msr_entry *msr;
			
 
				+	struct shared_msr_entry *msr;
			
 
				 	u64 host_tsc;
			
 
				 	int ret = 0;
			
 
				 
			
@@ -1044,6 +1041,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
 
				 	case MSR_GS_BASE:
			
 
				 		vmcs_writel(GUEST_GS_BASE, data);
			
 
				 		break;
			
 
				+	case MSR_KERNEL_GS_BASE:
			
 
				+		vmx_load_host_state(vmx);
			
 
				+		vmx->msr_guest_kernel_gs_base = data;
			
 
				+		break;
			
 
				 #endif
			
 
				 	case MSR_IA32_SYSENTER_CS:
			
 
				 		vmcs_write32(GUEST_SYSENTER_CS, data);
			
@@ -1097,30 +1098,14 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				+static void set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg)
			
 
				 {
			
 
				-	int old_debug = vcpu->guest_debug;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	vcpu->guest_debug = dbg->control;
			
 
				-	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
			
 
				-		vcpu->guest_debug = 0;
			
 
				-
			
 
				 	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
			
 
				 		vmcs_writel(GUEST_DR7, dbg->arch.debugreg[7]);
			
 
				 	else
			
 
				 		vmcs_writel(GUEST_DR7, vcpu->arch.dr7);
			
 
				 
			
 
				-	flags = vmcs_readl(GUEST_RFLAGS);
			
 
				-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		flags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
			
 
				-	else if (old_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		flags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				-	vmcs_writel(GUEST_RFLAGS, flags);
			
 
				-
			
 
				 	update_exception_bitmap(vcpu);
			
 
				-
			
 
				-	return 0;
			
 
				 }
			
 
				 
			
 
				 static __init int cpu_has_kvm_support(void)
			
@@ -1139,12 +1124,15 @@ static __init int vmx_disabled_by_bios(void)
 
				 	/* locked but not enabled */
			
 
				 }
			
 
				 
			
 
				-static void hardware_enable(void *garbage)
			
 
				+static int hardware_enable(void *garbage)
			
 
				 {
			
 
				 	int cpu = raw_smp_processor_id();
			
 
				 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
			
 
				 	u64 old;
			
 
				 
			
 
				+	if (read_cr4() & X86_CR4_VMXE)
			
 
				+		return -EBUSY;
			
 
				+
			
 
				 	INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
			
 
				 	rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
			
 
				 	if ((old & (FEATURE_CONTROL_LOCKED |
			
@@ -1159,6 +1147,10 @@ static void hardware_enable(void *garbage)
 
				 	asm volatile (ASM_VMX_VMXON_RAX
			
 
				 		      : : "a"(&phys_addr), "m"(phys_addr)
			
 
				 		      : "memory", "cc");
			
 
				+
			
 
				+	ept_sync_global();
			
 
				+
			
 
				+	return 0;
			
 
				 }
			
 
				 
			
 
				 static void vmclear_local_vcpus(void)
			
@@ -1250,7 +1242,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 
				 			SECONDARY_EXEC_WBINVD_EXITING |
			
 
				 			SECONDARY_EXEC_ENABLE_VPID |
			
 
				 			SECONDARY_EXEC_ENABLE_EPT |
			
 
				-			SECONDARY_EXEC_UNRESTRICTED_GUEST;
			
 
				+			SECONDARY_EXEC_UNRESTRICTED_GUEST |
			
 
				+			SECONDARY_EXEC_PAUSE_LOOP_EXITING;
			
 
				 		if (adjust_vmx_controls(min2, opt2,
			
 
				 					MSR_IA32_VMX_PROCBASED_CTLS2,
			
 
				 					&_cpu_based_2nd_exec_control) < 0)
			
@@ -1344,15 +1337,17 @@ static void free_kvm_area(void)
 
				 {
			
 
				 	int cpu;
			
 
				 
			
 
				-	for_each_online_cpu(cpu)
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				 		free_vmcs(per_cpu(vmxarea, cpu));
			
 
				+		per_cpu(vmxarea, cpu) = NULL;
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static __init int alloc_kvm_area(void)
			
 
				 {
			
 
				 	int cpu;
			
 
				 
			
 
				-	for_each_online_cpu(cpu) {
			
 
				+	for_each_possible_cpu(cpu) {
			
 
				 		struct vmcs *vmcs;
			
 
				 
			
 
				 		vmcs = alloc_vmcs_cpu(cpu);
			
@@ -1394,6 +1389,9 @@ static __init int hardware_setup(void)
 
				 	if (enable_ept && !cpu_has_vmx_ept_2m_page())
			
 
				 		kvm_disable_largepages();
			
 
				 
			
 
				+	if (!cpu_has_vmx_ple())
			
 
				+		ple_gap = 0;
			
 
				+
			
 
				 	return alloc_kvm_area();
			
 
				 }
			
 
				 
			
@@ -1536,8 +1534,16 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
 
				 static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				-	struct kvm_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
			
 
				+	struct shared_msr_entry *msr = find_msr_entry(vmx, MSR_EFER);
			
 
				+
			
 
				+	if (!msr)
			
 
				+		return;
			
 
				 
			
 
				+	/*
			
 
				+	 * Force kernel_gs_base reloading before EFER changes, as control
			
 
				+	 * of this msr depends on is_long_mode().
			
 
				+	 */
			
 
				+	vmx_load_host_state(to_vmx(vcpu));
			
 
				 	vcpu->arch.shadow_efer = efer;
			
 
				 	if (!msr)
			
 
				 		return;
			
@@ -1727,6 +1733,7 @@ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
 
				 		vmcs_write64(EPT_POINTER, eptp);
			
 
				 		guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
			
 
				 			vcpu->kvm->arch.ept_identity_map_addr;
			
 
				+		ept_load_pdptrs(vcpu);
			
 
				 	}
			
 
				 
			
 
				 	vmx_flush_tlb(vcpu);
			
@@ -2302,13 +2309,22 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
				 				~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
			
 
				 		if (vmx->vpid == 0)
			
 
				 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
			
 
				-		if (!enable_ept)
			
 
				+		if (!enable_ept) {
			
 
				 			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
			
 
				+			enable_unrestricted_guest = 0;
			
 
				+		}
			
 
				 		if (!enable_unrestricted_guest)
			
 
				 			exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
			
 
				+		if (!ple_gap)
			
 
				+			exec_control &= ~SECONDARY_EXEC_PAUSE_LOOP_EXITING;
			
 
				 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
			
 
				 	}
			
 
				 
			
 
				+	if (ple_gap) {
			
 
				+		vmcs_write32(PLE_GAP, ple_gap);
			
 
				+		vmcs_write32(PLE_WINDOW, ple_window);
			
 
				+	}
			
 
				+
			
 
				 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, !!bypass_guest_pf);
			
 
				 	vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, !!bypass_guest_pf);
			
 
				 	vmcs_write32(CR3_TARGET_COUNT, 0);           /* 22.2.1 */
			
@@ -2376,10 +2392,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
 
				 		if (wrmsr_safe(index, data_low, data_high) < 0)
			
 
				 			continue;
			
 
				 		data = data_low | ((u64)data_high << 32);
			
 
				-		vmx->host_msrs[j].index = index;
			
 
				-		vmx->host_msrs[j].reserved = 0;
			
 
				-		vmx->host_msrs[j].data = data;
			
 
				-		vmx->guest_msrs[j] = vmx->host_msrs[j];
			
 
				+		vmx->guest_msrs[j].index = i;
			
 
				+		vmx->guest_msrs[j].data = 0;
			
 
				+		vmx->guest_msrs[j].mask = -1ull;
			
 
				 		++vmx->nmsrs;
			
 
				 	}
			
 
				 
			
@@ -2510,7 +2525,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
 
				 	if (vmx->vpid != 0)
			
 
				 		vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
			
 
				 
			
 
				-	vmx->vcpu.arch.cr0 = 0x60000010;
			
 
				+	vmx->vcpu.arch.cr0 = X86_CR0_NW | X86_CR0_CD | X86_CR0_ET;
			
 
				 	vmx_set_cr0(&vmx->vcpu, vmx->vcpu.arch.cr0); /* enter rmode */
			
 
				 	vmx_set_cr4(&vmx->vcpu, 0);
			
 
				 	vmx_set_efer(&vmx->vcpu, 0);
			
@@ -2627,6 +2642,34 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 
				 				GUEST_INTR_STATE_NMI));
			
 
				 }
			
 
				 
			
 
				+static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	if (!cpu_has_virtual_nmis())
			
 
				+		return to_vmx(vcpu)->soft_vnmi_blocked;
			
 
				+	else
			
 
				+		return !!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
			
 
				+			  GUEST_INTR_STATE_NMI);
			
 
				+}
			
 
				+
			
 
				+static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
			
 
				+{
			
 
				+	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				+
			
 
				+	if (!cpu_has_virtual_nmis()) {
			
 
				+		if (vmx->soft_vnmi_blocked != masked) {
			
 
				+			vmx->soft_vnmi_blocked = masked;
			
 
				+			vmx->vnmi_blocked_time = 0;
			
 
				+		}
			
 
				+	} else {
			
 
				+		if (masked)
			
 
				+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
			
 
				+				      GUEST_INTR_STATE_NMI);
			
 
				+		else
			
 
				+			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
			
 
				+					GUEST_INTR_STATE_NMI);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) &&
			
@@ -2659,7 +2702,7 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 
				 	 * Cause the #SS fault with 0 error code in VM86 mode.
			
 
				 	 */
			
 
				 	if (((vec == GP_VECTOR) || (vec == SS_VECTOR)) && err_code == 0)
			
 
				-		if (emulate_instruction(vcpu, NULL, 0, 0, 0) == EMULATE_DONE)
			
 
				+		if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DONE)
			
 
				 			return 1;
			
 
				 	/*
			
 
				 	 * Forward all other exceptions that are valid in real mode.
			
@@ -2710,15 +2753,16 @@ static void kvm_machine_check(void)
 
				 #endif
			
 
				 }
			
 
				 
			
 
				-static int handle_machine_check(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_machine_check(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	/* already handled by vcpu_run */
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_exception(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				+	struct kvm_run *kvm_run = vcpu->run;
			
 
				 	u32 intr_info, ex_no, error_code;
			
 
				 	unsigned long cr2, rip, dr6;
			
 
				 	u32 vect_info;
			
@@ -2728,12 +2772,17 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
			
 
				 
			
 
				 	if (is_machine_check(intr_info))
			
 
				-		return handle_machine_check(vcpu, kvm_run);
			
 
				+		return handle_machine_check(vcpu);
			
 
				 
			
 
				 	if ((vect_info & VECTORING_INFO_VALID_MASK) &&
			
 
				-						!is_page_fault(intr_info))
			
 
				-		printk(KERN_ERR "%s: unexpected, vectoring info 0x%x "
			
 
				-		       "intr info 0x%x\n", __func__, vect_info, intr_info);
			
 
				+	    !is_page_fault(intr_info)) {
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
			
 
				+		vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_SIMUL_EX;
			
 
				+		vcpu->run->internal.ndata = 2;
			
 
				+		vcpu->run->internal.data[0] = vect_info;
			
 
				+		vcpu->run->internal.data[1] = intr_info;
			
 
				+		return 0;
			
 
				+	}
			
 
				 
			
 
				 	if ((intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR)
			
 
				 		return 1;  /* already handled by vmx_vcpu_run() */
			
@@ -2744,7 +2793,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	}
			
 
				 
			
 
				 	if (is_invalid_opcode(intr_info)) {
			
 
				-		er = emulate_instruction(vcpu, kvm_run, 0, 0, EMULTYPE_TRAP_UD);
			
 
				+		er = emulate_instruction(vcpu, 0, 0, EMULTYPE_TRAP_UD);
			
 
				 		if (er != EMULATE_DONE)
			
 
				 			kvm_queue_exception(vcpu, UD_VECTOR);
			
 
				 		return 1;
			
@@ -2803,20 +2852,19 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int handle_external_interrupt(struct kvm_vcpu *vcpu,
			
 
				-				     struct kvm_run *kvm_run)
			
 
				+static int handle_external_interrupt(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	++vcpu->stat.irq_exits;
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_triple_fault(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_triple_fault(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
			
 
				+	vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_io(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification;
			
 
				 	int size, in, string;
			
@@ -2827,8 +2875,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	string = (exit_qualification & 16) != 0;
			
 
				 
			
 
				 	if (string) {
			
 
				-		if (emulate_instruction(vcpu,
			
 
				-					kvm_run, 0, 0, 0) == EMULATE_DO_MMIO)
			
 
				+		if (emulate_instruction(vcpu, 0, 0, 0) == EMULATE_DO_MMIO)
			
 
				 			return 0;
			
 
				 		return 1;
			
 
				 	}
			
@@ -2838,7 +2885,7 @@ static int handle_io(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	port = exit_qualification >> 16;
			
 
				 
			
 
				 	skip_emulated_instruction(vcpu);
			
 
				-	return kvm_emulate_pio(vcpu, kvm_run, in, size, port);
			
 
				+	return kvm_emulate_pio(vcpu, in, size, port);
			
 
				 }
			
 
				 
			
 
				 static void
			
@@ -2852,7 +2899,7 @@ vmx_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
 
				 	hypercall[2] = 0xc1;
			
 
				 }
			
 
				 
			
 
				-static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_cr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification, val;
			
 
				 	int cr;
			
@@ -2887,7 +2934,7 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 					return 1;
			
 
				 				if (cr8_prev <= cr8)
			
 
				 					return 1;
			
 
				-				kvm_run->exit_reason = KVM_EXIT_SET_TPR;
			
 
				+				vcpu->run->exit_reason = KVM_EXIT_SET_TPR;
			
 
				 				return 0;
			
 
				 			}
			
 
				 		};
			
@@ -2922,13 +2969,13 @@ static int handle_cr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				-	kvm_run->exit_reason = 0;
			
 
				+	vcpu->run->exit_reason = 0;
			
 
				 	pr_unimpl(vcpu, "unhandled control register: op %d cr %d\n",
			
 
				 	       (int)(exit_qualification >> 4) & 3, cr);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_dr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification;
			
 
				 	unsigned long val;
			
@@ -2944,13 +2991,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		 * guest debugging itself.
			
 
				 		 */
			
 
				 		if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
			
 
				-			kvm_run->debug.arch.dr6 = vcpu->arch.dr6;
			
 
				-			kvm_run->debug.arch.dr7 = dr;
			
 
				-			kvm_run->debug.arch.pc =
			
 
				+			vcpu->run->debug.arch.dr6 = vcpu->arch.dr6;
			
 
				+			vcpu->run->debug.arch.dr7 = dr;
			
 
				+			vcpu->run->debug.arch.pc =
			
 
				 				vmcs_readl(GUEST_CS_BASE) +
			
 
				 				vmcs_readl(GUEST_RIP);
			
 
				-			kvm_run->debug.arch.exception = DB_VECTOR;
			
 
				-			kvm_run->exit_reason = KVM_EXIT_DEBUG;
			
 
				+			vcpu->run->debug.arch.exception = DB_VECTOR;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_DEBUG;
			
 
				 			return 0;
			
 
				 		} else {
			
 
				 			vcpu->arch.dr7 &= ~DR7_GD;
			
@@ -3016,13 +3063,13 @@ static int handle_dr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_cpuid(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_cpuid(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	kvm_emulate_cpuid(vcpu);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_rdmsr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data;
			
@@ -3041,7 +3088,7 @@ static int handle_rdmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_wrmsr(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	u32 ecx = vcpu->arch.regs[VCPU_REGS_RCX];
			
 
				 	u64 data = (vcpu->arch.regs[VCPU_REGS_RAX] & -1u)
			
@@ -3058,14 +3105,12 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu,
			
 
				-				      struct kvm_run *kvm_run)
			
 
				+static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_interrupt_window(struct kvm_vcpu *vcpu,
			
 
				-				   struct kvm_run *kvm_run)
			
 
				+static int handle_interrupt_window(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	u32 cpu_based_vm_exec_control;
			
 
				 
			
@@ -3081,34 +3126,34 @@ static int handle_interrupt_window(struct kvm_vcpu *vcpu,
 
				 	 * possible
			
 
				 	 */
			
 
				 	if (!irqchip_in_kernel(vcpu->kvm) &&
			
 
				-	    kvm_run->request_interrupt_window &&
			
 
				+	    vcpu->run->request_interrupt_window &&
			
 
				 	    !kvm_cpu_has_interrupt(vcpu)) {
			
 
				-		kvm_run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_IRQ_WINDOW_OPEN;
			
 
				 		return 0;
			
 
				 	}
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_halt(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_halt(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	skip_emulated_instruction(vcpu);
			
 
				 	return kvm_emulate_halt(vcpu);
			
 
				 }
			
 
				 
			
 
				-static int handle_vmcall(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_vmcall(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	skip_emulated_instruction(vcpu);
			
 
				 	kvm_emulate_hypercall(vcpu);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_vmx_insn(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_vmx_insn(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	kvm_queue_exception(vcpu, UD_VECTOR);
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_invlpg(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
			
 
				 
			
@@ -3117,14 +3162,14 @@ static int handle_invlpg(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_wbinvd(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	skip_emulated_instruction(vcpu);
			
 
				 	/* TODO: Add support for VT-d/pass-through device */
			
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_apic_access(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification;
			
 
				 	enum emulation_result er;
			
@@ -3133,7 +3178,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
			
 
				 	offset = exit_qualification & 0xffful;
			
 
				 
			
 
				-	er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
			
 
				+	er = emulate_instruction(vcpu, 0, 0, 0);
			
 
				 
			
 
				 	if (er !=  EMULATE_DONE) {
			
 
				 		printk(KERN_ERR
			
@@ -3144,7 +3189,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_task_switch(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	unsigned long exit_qualification;
			
@@ -3198,7 +3243,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_ept_violation(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	unsigned long exit_qualification;
			
 
				 	gpa_t gpa;
			
@@ -3219,8 +3264,8 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 			vmcs_readl(GUEST_LINEAR_ADDRESS));
			
 
				 		printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
			
 
				 			(long unsigned int)exit_qualification);
			
 
				-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				-		kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				+		vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_VIOLATION;
			
 
				 		return 0;
			
 
				 	}
			
 
				 
			
@@ -3290,7 +3335,7 @@ static void ept_misconfig_inspect_spte(struct kvm_vcpu *vcpu, u64 spte,
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	u64 sptes[4];
			
 
				 	int nr_sptes, i;
			
@@ -3306,13 +3351,13 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	for (i = PT64_ROOT_LEVEL; i > PT64_ROOT_LEVEL - nr_sptes; --i)
			
 
				 		ept_misconfig_inspect_spte(vcpu, sptes[i-1], i);
			
 
				 
			
 
				-	kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				-	kvm_run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
			
 
				+	vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				+	vcpu->run->hw.hardware_exit_reason = EXIT_REASON_EPT_MISCONFIG;
			
 
				 
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int handle_nmi_window(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	u32 cpu_based_vm_exec_control;
			
 
				 
			
@@ -3325,36 +3370,50 @@ static int handle_nmi_window(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	return 1;
			
 
				 }
			
 
				 
			
 
				-static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
			
 
				-				struct kvm_run *kvm_run)
			
 
				+static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	enum emulation_result err = EMULATE_DONE;
			
 
				-
			
 
				-	local_irq_enable();
			
 
				-	preempt_enable();
			
 
				+	int ret = 1;
			
 
				 
			
 
				 	while (!guest_state_valid(vcpu)) {
			
 
				-		err = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
			
 
				+		err = emulate_instruction(vcpu, 0, 0, 0);
			
 
				 
			
 
				-		if (err == EMULATE_DO_MMIO)
			
 
				-			break;
			
 
				+		if (err == EMULATE_DO_MMIO) {
			
 
				+			ret = 0;
			
 
				+			goto out;
			
 
				+		}
			
 
				 
			
 
				 		if (err != EMULATE_DONE) {
			
 
				 			kvm_report_emulation_failure(vcpu, "emulation failure");
			
 
				-			break;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
			
 
				+			vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
			
 
				+			vcpu->run->internal.ndata = 0;
			
 
				+			ret = 0;
			
 
				+			goto out;
			
 
				 		}
			
 
				 
			
 
				 		if (signal_pending(current))
			
 
				-			break;
			
 
				+			goto out;
			
 
				 		if (need_resched())
			
 
				 			schedule();
			
 
				 	}
			
 
				 
			
 
				-	preempt_disable();
			
 
				-	local_irq_disable();
			
 
				+	vmx->emulation_required = 0;
			
 
				+out:
			
 
				+	return ret;
			
 
				+}
			
 
				 
			
 
				-	vmx->invalid_state_emulation_result = err;
			
 
				+/*
			
 
				+ * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
			
 
				+ * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
			
 
				+ */
			
 
				+static int handle_pause(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	skip_emulated_instruction(vcpu);
			
 
				+	kvm_vcpu_on_spin(vcpu);
			
 
				+
			
 
				+	return 1;
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -3362,8 +3421,7 @@ static void handle_invalid_guest_state(struct kvm_vcpu *vcpu,
 
				  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
			
 
				  * to be done to userspace and return 0.
			
 
				  */
			
 
				-static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
			
 
				-				      struct kvm_run *kvm_run) = {
			
 
				+static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
			
 
				 	[EXIT_REASON_EXCEPTION_NMI]           = handle_exception,
			
 
				 	[EXIT_REASON_EXTERNAL_INTERRUPT]      = handle_external_interrupt,
			
 
				 	[EXIT_REASON_TRIPLE_FAULT]            = handle_triple_fault,
			
@@ -3394,6 +3452,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
 
				 	[EXIT_REASON_MCE_DURING_VMENTRY]      = handle_machine_check,
			
 
				 	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
			
 
				 	[EXIT_REASON_EPT_MISCONFIG]           = handle_ept_misconfig,
			
 
				+	[EXIT_REASON_PAUSE_INSTRUCTION]       = handle_pause,
			
 
				 };
			
 
				 
			
 
				 static const int kvm_vmx_max_exit_handlers =
			
@@ -3403,7 +3462,7 @@ static const int kvm_vmx_max_exit_handlers =
 
				  * The guest has exited.  See if we can fix it or if we need userspace
			
 
				  * assistance.
			
 
				  */
			
 
				-static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
			
 
				+static int vmx_handle_exit(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 	u32 exit_reason = vmx->exit_reason;
			
@@ -3411,13 +3470,9 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 
			
 
				 	trace_kvm_exit(exit_reason, kvm_rip_read(vcpu));
			
 
				 
			
 
				-	/* If we need to emulate an MMIO from handle_invalid_guest_state
			
 
				-	 * we just return 0 */
			
 
				-	if (vmx->emulation_required && emulate_invalid_guest_state) {
			
 
				-		if (guest_state_valid(vcpu))
			
 
				-			vmx->emulation_required = 0;
			
 
				-		return vmx->invalid_state_emulation_result != EMULATE_DO_MMIO;
			
 
				-	}
			
 
				+	/* If guest state is invalid, start emulating */
			
 
				+	if (vmx->emulation_required && emulate_invalid_guest_state)
			
 
				+		return handle_invalid_guest_state(vcpu);
			
 
				 
			
 
				 	/* Access CR3 don't cause VMExit in paging mode, so we need
			
 
				 	 * to sync with guest real CR3. */
			
@@ -3425,8 +3480,8 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
			
 
				 
			
 
				 	if (unlikely(vmx->fail)) {
			
 
				-		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
			
 
				-		kvm_run->fail_entry.hardware_entry_failure_reason
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
			
 
				+		vcpu->run->fail_entry.hardware_entry_failure_reason
			
 
				 			= vmcs_read32(VM_INSTRUCTION_ERROR);
			
 
				 		return 0;
			
 
				 	}
			
@@ -3459,10 +3514,10 @@ static int vmx_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 
				 
			
 
				 	if (exit_reason < kvm_vmx_max_exit_handlers
			
 
				 	    && kvm_vmx_exit_handlers[exit_reason])
			
 
				-		return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);
			
 
				+		return kvm_vmx_exit_handlers[exit_reason](vcpu);
			
 
				 	else {
			
 
				-		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				-		kvm_run->hw.hardware_exit_reason = exit_reason;
			
 
				+		vcpu->run->exit_reason = KVM_EXIT_UNKNOWN;
			
 
				+		vcpu->run->hw.hardware_exit_reason = exit_reason;
			
 
				 	}
			
 
				 	return 0;
			
 
				 }
			
@@ -3600,23 +3655,18 @@ static void fixup_rmode_irq(struct vcpu_vmx *vmx)
 
				 #define Q "l"
			
 
				 #endif
			
 
				 
			
 
				-static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	struct vcpu_vmx *vmx = to_vmx(vcpu);
			
 
				 
			
 
				-	if (enable_ept && is_paging(vcpu)) {
			
 
				-		vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
			
 
				-		ept_load_pdptrs(vcpu);
			
 
				-	}
			
 
				 	/* Record the guest's net vcpu time for enforced NMI injections. */
			
 
				 	if (unlikely(!cpu_has_virtual_nmis() && vmx->soft_vnmi_blocked))
			
 
				 		vmx->entry_time = ktime_get();
			
 
				 
			
 
				-	/* Handle invalid guest state instead of entering VMX */
			
 
				-	if (vmx->emulation_required && emulate_invalid_guest_state) {
			
 
				-		handle_invalid_guest_state(vcpu, kvm_run);
			
 
				+	/* Don't enter VMX if guest state is invalid, let the exit handler
			
 
				+	   start emulation until we arrive back to a valid state */
			
 
				+	if (vmx->emulation_required && emulate_invalid_guest_state)
			
 
				 		return;
			
 
				-	}
			
 
				 
			
 
				 	if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
			
 
				 		vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
			
@@ -3775,7 +3825,6 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
				 		__clear_bit(vmx->vpid, vmx_vpid_bitmap);
			
 
				 	spin_unlock(&vmx_vpid_lock);
			
 
				 	vmx_free_vmcs(vcpu);
			
 
				-	kfree(vmx->host_msrs);
			
 
				 	kfree(vmx->guest_msrs);
			
 
				 	kvm_vcpu_uninit(vcpu);
			
 
				 	kmem_cache_free(kvm_vcpu_cache, vmx);
			
@@ -3802,10 +3851,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 		goto uninit_vcpu;
			
 
				 	}
			
 
				 
			
 
				-	vmx->host_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
			
 
				-	if (!vmx->host_msrs)
			
 
				-		goto free_guest_msrs;
			
 
				-
			
 
				 	vmx->vmcs = alloc_vmcs();
			
 
				 	if (!vmx->vmcs)
			
 
				 		goto free_msrs;
			
@@ -3836,8 +3881,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
 
				 free_vmcs:
			
 
				 	free_vmcs(vmx->vmcs);
			
 
				 free_msrs:
			
 
				-	kfree(vmx->host_msrs);
			
 
				-free_guest_msrs:
			
 
				 	kfree(vmx->guest_msrs);
			
 
				 uninit_vcpu:
			
 
				 	kvm_vcpu_uninit(&vmx->vcpu);
			
@@ -3973,6 +4016,8 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
				 	.queue_exception = vmx_queue_exception,
			
 
				 	.interrupt_allowed = vmx_interrupt_allowed,
			
 
				 	.nmi_allowed = vmx_nmi_allowed,
			
 
				+	.get_nmi_mask = vmx_get_nmi_mask,
			
 
				+	.set_nmi_mask = vmx_set_nmi_mask,
			
 
				 	.enable_nmi_window = enable_nmi_window,
			
 
				 	.enable_irq_window = enable_irq_window,
			
 
				 	.update_cr8_intercept = update_cr8_intercept,
			
@@ -3987,7 +4032,12 @@ static struct kvm_x86_ops vmx_x86_ops = {
 
				 
			
 
				 static int __init vmx_init(void)
			
 
				 {
			
 
				-	int r;
			
 
				+	int r, i;
			
 
				+
			
 
				+	rdmsrl_safe(MSR_EFER, &host_efer);
			
 
				+
			
 
				+	for (i = 0; i < NR_VMX_MSR; ++i)
			
 
				+		kvm_define_shared_msr(i, vmx_msr_index[i]);
			
 
				 
			
 
				 	vmx_io_bitmap_a = (unsigned long *)__get_free_page(GFP_KERNEL);
			
 
				 	if (!vmx_io_bitmap_a)
			
@@ -4049,8 +4099,6 @@ static int __init vmx_init(void)
 
				 	if (bypass_guest_pf)
			
 
				 		kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
			
 
				 
			
 
				-	ept_sync_global();
			
 
				-
			
 
				 	return 0;
			
 
				 
			
 
				 out3:
			
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -37,6 +37,7 @@
 
				 #include <linux/iommu.h>
			
 
				 #include <linux/intel-iommu.h>
			
 
				 #include <linux/cpufreq.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 #include <trace/events/kvm.h>
			
 
				 #undef TRACE_INCLUDE_FILE
			
 
				 #define CREATE_TRACE_POINTS
			
@@ -88,6 +89,25 @@ EXPORT_SYMBOL_GPL(kvm_x86_ops);
 
				 int ignore_msrs = 0;
			
 
				 module_param_named(ignore_msrs, ignore_msrs, bool, S_IRUGO | S_IWUSR);
			
 
				 
			
 
				+#define KVM_NR_SHARED_MSRS 16
			
 
				+
			
 
				+struct kvm_shared_msrs_global {
			
 
				+	int nr;
			
 
				+	struct kvm_shared_msr {
			
 
				+		u32 msr;
			
 
				+		u64 value;
			
 
				+	} msrs[KVM_NR_SHARED_MSRS];
			
 
				+};
			
 
				+
			
 
				+struct kvm_shared_msrs {
			
 
				+	struct user_return_notifier urn;
			
 
				+	bool registered;
			
 
				+	u64 current_value[KVM_NR_SHARED_MSRS];
			
 
				+};
			
 
				+
			
 
				+static struct kvm_shared_msrs_global __read_mostly shared_msrs_global;
			
 
				+static DEFINE_PER_CPU(struct kvm_shared_msrs, shared_msrs);
			
 
				+
			
 
				 struct kvm_stats_debugfs_item debugfs_entries[] = {
			
 
				 	{ "pf_fixed", VCPU_STAT(pf_fixed) },
			
 
				 	{ "pf_guest", VCPU_STAT(pf_guest) },
			
@@ -124,6 +144,72 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
 
				 	{ NULL }
			
 
				 };
			
 
				 
			
 
				+static void kvm_on_user_return(struct user_return_notifier *urn)
			
 
				+{
			
 
				+	unsigned slot;
			
 
				+	struct kvm_shared_msr *global;
			
 
				+	struct kvm_shared_msrs *locals
			
 
				+		= container_of(urn, struct kvm_shared_msrs, urn);
			
 
				+
			
 
				+	for (slot = 0; slot < shared_msrs_global.nr; ++slot) {
			
 
				+		global = &shared_msrs_global.msrs[slot];
			
 
				+		if (global->value != locals->current_value[slot]) {
			
 
				+			wrmsrl(global->msr, global->value);
			
 
				+			locals->current_value[slot] = global->value;
			
 
				+		}
			
 
				+	}
			
 
				+	locals->registered = false;
			
 
				+	user_return_notifier_unregister(urn);
			
 
				+}
			
 
				+
			
 
				+void kvm_define_shared_msr(unsigned slot, u32 msr)
			
 
				+{
			
 
				+	int cpu;
			
 
				+	u64 value;
			
 
				+
			
 
				+	if (slot >= shared_msrs_global.nr)
			
 
				+		shared_msrs_global.nr = slot + 1;
			
 
				+	shared_msrs_global.msrs[slot].msr = msr;
			
 
				+	rdmsrl_safe(msr, &value);
			
 
				+	shared_msrs_global.msrs[slot].value = value;
			
 
				+	for_each_online_cpu(cpu)
			
 
				+		per_cpu(shared_msrs, cpu).current_value[slot] = value;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_define_shared_msr);
			
 
				+
			
 
				+static void kvm_shared_msr_cpu_online(void)
			
 
				+{
			
 
				+	unsigned i;
			
 
				+	struct kvm_shared_msrs *locals = &__get_cpu_var(shared_msrs);
			
 
				+
			
 
				+	for (i = 0; i < shared_msrs_global.nr; ++i)
			
 
				+		locals->current_value[i] = shared_msrs_global.msrs[i].value;
			
 
				+}
			
 
				+
			
 
				+void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
			
 
				+{
			
 
				+	struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
			
 
				+
			
 
				+	if (((value ^ smsr->current_value[slot]) & mask) == 0)
			
 
				+		return;
			
 
				+	smsr->current_value[slot] = value;
			
 
				+	wrmsrl(shared_msrs_global.msrs[slot].msr, value);
			
 
				+	if (!smsr->registered) {
			
 
				+		smsr->urn.on_user_return = kvm_on_user_return;
			
 
				+		user_return_notifier_register(&smsr->urn);
			
 
				+		smsr->registered = true;
			
 
				+	}
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
			
 
				+
			
 
				+static void drop_user_return_notifiers(void *ignore)
			
 
				+{
			
 
				+	struct kvm_shared_msrs *smsr = &__get_cpu_var(shared_msrs);
			
 
				+
			
 
				+	if (smsr->registered)
			
 
				+		kvm_on_user_return(&smsr->urn);
			
 
				+}
			
 
				+
			
 
				 unsigned long segment_base(u16 selector)
			
 
				 {
			
 
				 	struct descriptor_table gdt;
			
@@ -485,16 +571,19 @@ static inline u32 bit(int bitno)
 
				  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
			
 
				  *
			
 
				  * This list is modified at module load time to reflect the
			
 
				- * capabilities of the host cpu.
			
 
				+ * capabilities of the host cpu. This capabilities test skips MSRs that are
			
 
				+ * kvm-specific. Those are put in the beginning of the list.
			
 
				  */
			
 
				+
			
 
				+#define KVM_SAVE_MSRS_BEGIN	2
			
 
				 static u32 msrs_to_save[] = {
			
 
				+	MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
			
 
				 	MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
			
 
				 	MSR_K6_STAR,
			
 
				 #ifdef CONFIG_X86_64
			
 
				 	MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
			
 
				 #endif
			
 
				-	MSR_IA32_TSC, MSR_KVM_SYSTEM_TIME, MSR_KVM_WALL_CLOCK,
			
 
				-	MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
			
 
				+	MSR_IA32_TSC, MSR_IA32_PERF_STATUS, MSR_IA32_CR_PAT, MSR_VM_HSAVE_PA
			
 
				 };
			
 
				 
			
 
				 static unsigned num_msrs_to_save;
			
@@ -678,7 +767,8 @@ static void kvm_write_guest_time(struct kvm_vcpu *v)
 
				 	/* With all the info we got, fill in the values */
			
 
				 
			
 
				 	vcpu->hv_clock.system_time = ts.tv_nsec +
			
 
				-				     (NSEC_PER_SEC * (u64)ts.tv_sec);
			
 
				+				     (NSEC_PER_SEC * (u64)ts.tv_sec) + v->kvm->arch.kvmclock_offset;
			
 
				+
			
 
				 	/*
			
 
				 	 * The interface expects us to write an even number signaling that the
			
 
				 	 * update is finished. Since the guest won't see the intermediate
			
@@ -836,6 +926,38 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static int xen_hvm_config(struct kvm_vcpu *vcpu, u64 data)
			
 
				+{
			
 
				+	struct kvm *kvm = vcpu->kvm;
			
 
				+	int lm = is_long_mode(vcpu);
			
 
				+	u8 *blob_addr = lm ? (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_64
			
 
				+		: (u8 *)(long)kvm->arch.xen_hvm_config.blob_addr_32;
			
 
				+	u8 blob_size = lm ? kvm->arch.xen_hvm_config.blob_size_64
			
 
				+		: kvm->arch.xen_hvm_config.blob_size_32;
			
 
				+	u32 page_num = data & ~PAGE_MASK;
			
 
				+	u64 page_addr = data & PAGE_MASK;
			
 
				+	u8 *page;
			
 
				+	int r;
			
 
				+
			
 
				+	r = -E2BIG;
			
 
				+	if (page_num >= blob_size)
			
 
				+		goto out;
			
 
				+	r = -ENOMEM;
			
 
				+	page = kzalloc(PAGE_SIZE, GFP_KERNEL);
			
 
				+	if (!page)
			
 
				+		goto out;
			
 
				+	r = -EFAULT;
			
 
				+	if (copy_from_user(page, blob_addr + (page_num * PAGE_SIZE), PAGE_SIZE))
			
 
				+		goto out_free;
			
 
				+	if (kvm_write_guest(kvm, page_addr, page, PAGE_SIZE))
			
 
				+		goto out_free;
			
 
				+	r = 0;
			
 
				+out_free:
			
 
				+	kfree(page);
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				 int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
			
 
				 {
			
 
				 	switch (msr) {
			
@@ -951,6 +1073,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
 
				 			"0x%x data 0x%llx\n", msr, data);
			
 
				 		break;
			
 
				 	default:
			
 
				+		if (msr && (msr == vcpu->kvm->arch.xen_hvm_config.msr))
			
 
				+			return xen_hvm_config(vcpu, data);
			
 
				 		if (!ignore_msrs) {
			
 
				 			pr_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n",
			
 
				 				msr, data);
			
@@ -1225,6 +1349,9 @@ int kvm_dev_ioctl_check_extension(long ext)
 
				 	case KVM_CAP_PIT2:
			
 
				 	case KVM_CAP_PIT_STATE2:
			
 
				 	case KVM_CAP_SET_IDENTITY_MAP_ADDR:
			
 
				+	case KVM_CAP_XEN_HVM:
			
 
				+	case KVM_CAP_ADJUST_CLOCK:
			
 
				+	case KVM_CAP_VCPU_EVENTS:
			
 
				 		r = 1;
			
 
				 		break;
			
 
				 	case KVM_CAP_COALESCED_MMIO:
			
@@ -1239,8 +1366,8 @@ int kvm_dev_ioctl_check_extension(long ext)
 
				 	case KVM_CAP_NR_MEMSLOTS:
			
 
				 		r = KVM_MEMORY_SLOTS;
			
 
				 		break;
			
 
				-	case KVM_CAP_PV_MMU:
			
 
				-		r = !tdp_enabled;
			
 
				+	case KVM_CAP_PV_MMU:	/* obsolete */
			
 
				+		r = 0;
			
 
				 		break;
			
 
				 	case KVM_CAP_IOMMU:
			
 
				 		r = iommu_found();
			
@@ -1327,6 +1454,12 @@ long kvm_arch_dev_ioctl(struct file *filp,
 
				 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
			
 
				 {
			
 
				 	kvm_x86_ops->vcpu_load(vcpu, cpu);
			
 
				+	if (unlikely(per_cpu(cpu_tsc_khz, cpu) == 0)) {
			
 
				+		unsigned long khz = cpufreq_quick_get(cpu);
			
 
				+		if (!khz)
			
 
				+			khz = tsc_khz;
			
 
				+		per_cpu(cpu_tsc_khz, cpu) = khz;
			
 
				+	}
			
 
				 	kvm_request_guest_time_update(vcpu);
			
 
				 }
			
 
				 
			
@@ -1760,6 +1893,61 @@ static int kvm_vcpu_ioctl_x86_set_mce(struct kvm_vcpu *vcpu,
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				+static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu,
			
 
				+					       struct kvm_vcpu_events *events)
			
 
				+{
			
 
				+	vcpu_load(vcpu);
			
 
				+
			
 
				+	events->exception.injected = vcpu->arch.exception.pending;
			
 
				+	events->exception.nr = vcpu->arch.exception.nr;
			
 
				+	events->exception.has_error_code = vcpu->arch.exception.has_error_code;
			
 
				+	events->exception.error_code = vcpu->arch.exception.error_code;
			
 
				+
			
 
				+	events->interrupt.injected = vcpu->arch.interrupt.pending;
			
 
				+	events->interrupt.nr = vcpu->arch.interrupt.nr;
			
 
				+	events->interrupt.soft = vcpu->arch.interrupt.soft;
			
 
				+
			
 
				+	events->nmi.injected = vcpu->arch.nmi_injected;
			
 
				+	events->nmi.pending = vcpu->arch.nmi_pending;
			
 
				+	events->nmi.masked = kvm_x86_ops->get_nmi_mask(vcpu);
			
 
				+
			
 
				+	events->sipi_vector = vcpu->arch.sipi_vector;
			
 
				+
			
 
				+	events->flags = 0;
			
 
				+
			
 
				+	vcpu_put(vcpu);
			
 
				+}
			
 
				+
			
 
				+static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu,
			
 
				+					      struct kvm_vcpu_events *events)
			
 
				+{
			
 
				+	if (events->flags)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	vcpu_load(vcpu);
			
 
				+
			
 
				+	vcpu->arch.exception.pending = events->exception.injected;
			
 
				+	vcpu->arch.exception.nr = events->exception.nr;
			
 
				+	vcpu->arch.exception.has_error_code = events->exception.has_error_code;
			
 
				+	vcpu->arch.exception.error_code = events->exception.error_code;
			
 
				+
			
 
				+	vcpu->arch.interrupt.pending = events->interrupt.injected;
			
 
				+	vcpu->arch.interrupt.nr = events->interrupt.nr;
			
 
				+	vcpu->arch.interrupt.soft = events->interrupt.soft;
			
 
				+	if (vcpu->arch.interrupt.pending && irqchip_in_kernel(vcpu->kvm))
			
 
				+		kvm_pic_clear_isr_ack(vcpu->kvm);
			
 
				+
			
 
				+	vcpu->arch.nmi_injected = events->nmi.injected;
			
 
				+	vcpu->arch.nmi_pending = events->nmi.pending;
			
 
				+	kvm_x86_ops->set_nmi_mask(vcpu, events->nmi.masked);
			
 
				+
			
 
				+	vcpu->arch.sipi_vector = events->sipi_vector;
			
 
				+
			
 
				+	vcpu_put(vcpu);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				 long kvm_arch_vcpu_ioctl(struct file *filp,
			
 
				 			 unsigned int ioctl, unsigned long arg)
			
 
				 {
			
@@ -1770,6 +1958,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
				 
			
 
				 	switch (ioctl) {
			
 
				 	case KVM_GET_LAPIC: {
			
 
				+		r = -EINVAL;
			
 
				+		if (!vcpu->arch.apic)
			
 
				+			goto out;
			
 
				 		lapic = kzalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
			
 
				 
			
 
				 		r = -ENOMEM;
			
@@ -1785,6 +1976,9 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
				 		break;
			
 
				 	}
			
 
				 	case KVM_SET_LAPIC: {
			
 
				+		r = -EINVAL;
			
 
				+		if (!vcpu->arch.apic)
			
 
				+			goto out;
			
 
				 		lapic = kmalloc(sizeof(struct kvm_lapic_state), GFP_KERNEL);
			
 
				 		r = -ENOMEM;
			
 
				 		if (!lapic)
			
@@ -1911,6 +2105,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 
				 		r = kvm_vcpu_ioctl_x86_set_mce(vcpu, &mce);
			
 
				 		break;
			
 
				 	}
			
 
				+	case KVM_GET_VCPU_EVENTS: {
			
 
				+		struct kvm_vcpu_events events;
			
 
				+
			
 
				+		kvm_vcpu_ioctl_x86_get_vcpu_events(vcpu, &events);
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &events, sizeof(struct kvm_vcpu_events)))
			
 
				+			break;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_VCPU_EVENTS: {
			
 
				+		struct kvm_vcpu_events events;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&events, argp, sizeof(struct kvm_vcpu_events)))
			
 
				+			break;
			
 
				+
			
 
				+		r = kvm_vcpu_ioctl_x86_set_vcpu_events(vcpu, &events);
			
 
				+		break;
			
 
				+	}
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
 
				 	}
			
@@ -2039,9 +2254,7 @@ static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
				 			sizeof(struct kvm_pic_state));
			
 
				 		break;
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		memcpy(&chip->chip.ioapic,
			
 
				-			ioapic_irqchip(kvm),
			
 
				-			sizeof(struct kvm_ioapic_state));
			
 
				+		r = kvm_get_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -2071,11 +2284,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
 
				 		spin_unlock(&pic_irqchip(kvm)->lock);
			
 
				 		break;
			
 
				 	case KVM_IRQCHIP_IOAPIC:
			
 
				-		mutex_lock(&kvm->irq_lock);
			
 
				-		memcpy(ioapic_irqchip(kvm),
			
 
				-			&chip->chip.ioapic,
			
 
				-			sizeof(struct kvm_ioapic_state));
			
 
				-		mutex_unlock(&kvm->irq_lock);
			
 
				+		r = kvm_set_ioapic(kvm, &chip->chip.ioapic);
			
 
				 		break;
			
 
				 	default:
			
 
				 		r = -EINVAL;
			
@@ -2183,7 +2392,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 {
			
 
				 	struct kvm *kvm = filp->private_data;
			
 
				 	void __user *argp = (void __user *)arg;
			
 
				-	int r = -EINVAL;
			
 
				+	int r = -ENOTTY;
			
 
				 	/*
			
 
				 	 * This union makes it completely explicit to gcc-3.x
			
 
				 	 * that these two variables' stack usage should be
			
@@ -2245,25 +2454,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 		if (r)
			
 
				 			goto out;
			
 
				 		break;
			
 
				-	case KVM_CREATE_IRQCHIP:
			
 
				+	case KVM_CREATE_IRQCHIP: {
			
 
				+		struct kvm_pic *vpic;
			
 
				+
			
 
				+		mutex_lock(&kvm->lock);
			
 
				+		r = -EEXIST;
			
 
				+		if (kvm->arch.vpic)
			
 
				+			goto create_irqchip_unlock;
			
 
				 		r = -ENOMEM;
			
 
				-		kvm->arch.vpic = kvm_create_pic(kvm);
			
 
				-		if (kvm->arch.vpic) {
			
 
				+		vpic = kvm_create_pic(kvm);
			
 
				+		if (vpic) {
			
 
				 			r = kvm_ioapic_init(kvm);
			
 
				 			if (r) {
			
 
				-				kfree(kvm->arch.vpic);
			
 
				-				kvm->arch.vpic = NULL;
			
 
				-				goto out;
			
 
				+				kfree(vpic);
			
 
				+				goto create_irqchip_unlock;
			
 
				 			}
			
 
				 		} else
			
 
				-			goto out;
			
 
				+			goto create_irqchip_unlock;
			
 
				+		smp_wmb();
			
 
				+		kvm->arch.vpic = vpic;
			
 
				+		smp_wmb();
			
 
				 		r = kvm_setup_default_irq_routing(kvm);
			
 
				 		if (r) {
			
 
				+			mutex_lock(&kvm->irq_lock);
			
 
				 			kfree(kvm->arch.vpic);
			
 
				 			kfree(kvm->arch.vioapic);
			
 
				-			goto out;
			
 
				+			kvm->arch.vpic = NULL;
			
 
				+			kvm->arch.vioapic = NULL;
			
 
				+			mutex_unlock(&kvm->irq_lock);
			
 
				 		}
			
 
				+	create_irqchip_unlock:
			
 
				+		mutex_unlock(&kvm->lock);
			
 
				 		break;
			
 
				+	}
			
 
				 	case KVM_CREATE_PIT:
			
 
				 		u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
			
 
				 		goto create_pit;
			
@@ -2293,10 +2516,8 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 			goto out;
			
 
				 		if (irqchip_in_kernel(kvm)) {
			
 
				 			__s32 status;
			
 
				-			mutex_lock(&kvm->irq_lock);
			
 
				 			status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID,
			
 
				 					irq_event.irq, irq_event.level);
			
 
				-			mutex_unlock(&kvm->irq_lock);
			
 
				 			if (ioctl == KVM_IRQ_LINE_STATUS) {
			
 
				 				irq_event.status = status;
			
 
				 				if (copy_to_user(argp, &irq_event,
			
@@ -2422,6 +2643,55 @@ long kvm_arch_vm_ioctl(struct file *filp,
 
				 		r = 0;
			
 
				 		break;
			
 
				 	}
			
 
				+	case KVM_XEN_HVM_CONFIG: {
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
			
 
				+				   sizeof(struct kvm_xen_hvm_config)))
			
 
				+			goto out;
			
 
				+		r = -EINVAL;
			
 
				+		if (kvm->arch.xen_hvm_config.flags)
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_SET_CLOCK: {
			
 
				+		struct timespec now;
			
 
				+		struct kvm_clock_data user_ns;
			
 
				+		u64 now_ns;
			
 
				+		s64 delta;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&user_ns, argp, sizeof(user_ns)))
			
 
				+			goto out;
			
 
				+
			
 
				+		r = -EINVAL;
			
 
				+		if (user_ns.flags)
			
 
				+			goto out;
			
 
				+
			
 
				+		r = 0;
			
 
				+		ktime_get_ts(&now);
			
 
				+		now_ns = timespec_to_ns(&now);
			
 
				+		delta = user_ns.clock - now_ns;
			
 
				+		kvm->arch.kvmclock_offset = delta;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_GET_CLOCK: {
			
 
				+		struct timespec now;
			
 
				+		struct kvm_clock_data user_ns;
			
 
				+		u64 now_ns;
			
 
				+
			
 
				+		ktime_get_ts(&now);
			
 
				+		now_ns = timespec_to_ns(&now);
			
 
				+		user_ns.clock = kvm->arch.kvmclock_offset + now_ns;
			
 
				+		user_ns.flags = 0;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_to_user(argp, &user_ns, sizeof(user_ns)))
			
 
				+			goto out;
			
 
				+		r = 0;
			
 
				+		break;
			
 
				+	}
			
 
				+
			
 
				 	default:
			
 
				 		;
			
 
				 	}
			
@@ -2434,7 +2704,8 @@ static void kvm_init_msr_list(void)
 
				 	u32 dummy[2];
			
 
				 	unsigned i, j;
			
 
				 
			
 
				-	for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
			
 
				+	/* skip the first msrs in the list. KVM-specific */
			
 
				+	for (i = j = KVM_SAVE_MSRS_BEGIN; i < ARRAY_SIZE(msrs_to_save); i++) {
			
 
				 		if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
			
 
				 			continue;
			
 
				 		if (j < i)
			
@@ -2758,13 +3029,13 @@ static void cache_all_regs(struct kvm_vcpu *vcpu)
 
				 }
			
 
				 
			
 
				 int emulate_instruction(struct kvm_vcpu *vcpu,
			
 
				-			struct kvm_run *run,
			
 
				 			unsigned long cr2,
			
 
				 			u16 error_code,
			
 
				 			int emulation_type)
			
 
				 {
			
 
				 	int r, shadow_mask;
			
 
				 	struct decode_cache *c;
			
 
				+	struct kvm_run *run = vcpu->run;
			
 
				 
			
 
				 	kvm_clear_exception_queue(vcpu);
			
 
				 	vcpu->arch.mmio_fault_cr2 = cr2;
			
@@ -2784,7 +3055,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
				 		kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
			
 
				 
			
 
				 		vcpu->arch.emulate_ctxt.vcpu = vcpu;
			
 
				-		vcpu->arch.emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				+		vcpu->arch.emulate_ctxt.eflags = kvm_get_rflags(vcpu);
			
 
				 		vcpu->arch.emulate_ctxt.mode =
			
 
				 			(vcpu->arch.emulate_ctxt.eflags & X86_EFLAGS_VM)
			
 
				 			? X86EMUL_MODE_REAL : cs_l
			
@@ -2862,7 +3133,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
 
				 		return EMULATE_DO_MMIO;
			
 
				 	}
			
 
				 
			
 
				-	kvm_x86_ops->set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
			
 
				+	kvm_set_rflags(vcpu, vcpu->arch.emulate_ctxt.eflags);
			
 
				 
			
 
				 	if (vcpu->mmio_is_write) {
			
 
				 		vcpu->mmio_needed = 0;
			
@@ -2970,8 +3241,7 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				-int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				-		  int size, unsigned port)
			
 
				+int kvm_emulate_pio(struct kvm_vcpu *vcpu, int in, int size, unsigned port)
			
 
				 {
			
 
				 	unsigned long val;
			
 
				 
			
@@ -3000,7 +3270,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_emulate_pio);
			
 
				 
			
 
				-int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
			
 
				+int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, int in,
			
 
				 		  int size, unsigned long count, int down,
			
 
				 		  gva_t address, int rep, unsigned port)
			
 
				 {
			
@@ -3073,9 +3343,6 @@ static void bounce_off(void *info)
 
				 	/* nothing */
			
 
				 }
			
 
				 
			
 
				-static unsigned int  ref_freq;
			
 
				-static unsigned long tsc_khz_ref;
			
 
				-
			
 
				 static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
			
 
				 				     void *data)
			
 
				 {
			
@@ -3084,14 +3351,11 @@ static int kvmclock_cpufreq_notifier(struct notifier_block *nb, unsigned long va
 
				 	struct kvm_vcpu *vcpu;
			
 
				 	int i, send_ipi = 0;
			
 
				 
			
 
				-	if (!ref_freq)
			
 
				-		ref_freq = freq->old;
			
 
				-
			
 
				 	if (val == CPUFREQ_PRECHANGE && freq->old > freq->new)
			
 
				 		return 0;
			
 
				 	if (val == CPUFREQ_POSTCHANGE && freq->old < freq->new)
			
 
				 		return 0;
			
 
				-	per_cpu(cpu_tsc_khz, freq->cpu) = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
			
 
				+	per_cpu(cpu_tsc_khz, freq->cpu) = freq->new;
			
 
				 
			
 
				 	spin_lock(&kvm_lock);
			
 
				 	list_for_each_entry(kvm, &vm_list, vm_list) {
			
@@ -3128,9 +3392,28 @@ static struct notifier_block kvmclock_cpufreq_notifier_block = {
 
				         .notifier_call  = kvmclock_cpufreq_notifier
			
 
				 };
			
 
				 
			
 
				+static void kvm_timer_init(void)
			
 
				+{
			
 
				+	int cpu;
			
 
				+
			
 
				+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
			
 
				+		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
			
 
				+					  CPUFREQ_TRANSITION_NOTIFIER);
			
 
				+		for_each_online_cpu(cpu) {
			
 
				+			unsigned long khz = cpufreq_get(cpu);
			
 
				+			if (!khz)
			
 
				+				khz = tsc_khz;
			
 
				+			per_cpu(cpu_tsc_khz, cpu) = khz;
			
 
				+		}
			
 
				+	} else {
			
 
				+		for_each_possible_cpu(cpu)
			
 
				+			per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				 int kvm_arch_init(void *opaque)
			
 
				 {
			
 
				-	int r, cpu;
			
 
				+	int r;
			
 
				 	struct kvm_x86_ops *ops = (struct kvm_x86_ops *)opaque;
			
 
				 
			
 
				 	if (kvm_x86_ops) {
			
@@ -3162,13 +3445,7 @@ int kvm_arch_init(void *opaque)
 
				 	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
			
 
				 			PT_DIRTY_MASK, PT64_NX_MASK, 0);
			
 
				 
			
 
				-	for_each_possible_cpu(cpu)
			
 
				-		per_cpu(cpu_tsc_khz, cpu) = tsc_khz;
			
 
				-	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
			
 
				-		tsc_khz_ref = tsc_khz;
			
 
				-		cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
			
 
				-					  CPUFREQ_TRANSITION_NOTIFIER);
			
 
				-	}
			
 
				+	kvm_timer_init();
			
 
				 
			
 
				 	return 0;
			
 
				 
			
@@ -3296,7 +3573,7 @@ void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
 
				 		   unsigned long *rflags)
			
 
				 {
			
 
				 	kvm_lmsw(vcpu, msw);
			
 
				-	*rflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				+	*rflags = kvm_get_rflags(vcpu);
			
 
				 }
			
 
				 
			
 
				 unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
			
@@ -3334,7 +3611,7 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
 
				 	switch (cr) {
			
 
				 	case 0:
			
 
				 		kvm_set_cr0(vcpu, mk_cr_64(vcpu->arch.cr0, val));
			
 
				-		*rflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				+		*rflags = kvm_get_rflags(vcpu);
			
 
				 		break;
			
 
				 	case 2:
			
 
				 		vcpu->arch.cr2 = val;
			
@@ -3454,18 +3731,18 @@ EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
				  *
			
 
				  * No need to exit to userspace if we already have an interrupt queued.
			
 
				  */
			
 
				-static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu,
			
 
				-					  struct kvm_run *kvm_run)
			
 
				+static int dm_request_for_irq_injection(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	return (!irqchip_in_kernel(vcpu->kvm) && !kvm_cpu_has_interrupt(vcpu) &&
			
 
				-		kvm_run->request_interrupt_window &&
			
 
				+		vcpu->run->request_interrupt_window &&
			
 
				 		kvm_arch_interrupt_allowed(vcpu));
			
 
				 }
			
 
				 
			
 
				-static void post_kvm_run_save(struct kvm_vcpu *vcpu,
			
 
				-			      struct kvm_run *kvm_run)
			
 
				+static void post_kvm_run_save(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				-	kvm_run->if_flag = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
			
 
				+	struct kvm_run *kvm_run = vcpu->run;
			
 
				+
			
 
				+	kvm_run->if_flag = (kvm_get_rflags(vcpu) & X86_EFLAGS_IF) != 0;
			
 
				 	kvm_run->cr8 = kvm_get_cr8(vcpu);
			
 
				 	kvm_run->apic_base = kvm_get_apic_base(vcpu);
			
 
				 	if (irqchip_in_kernel(vcpu->kvm))
			
@@ -3526,7 +3803,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu)
 
				 	kvm_x86_ops->update_cr8_intercept(vcpu, tpr, max_irr);
			
 
				 }
			
 
				 
			
 
				-static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static void inject_pending_event(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	/* try to reinject previous events if any */
			
 
				 	if (vcpu->arch.exception.pending) {
			
@@ -3562,11 +3839,11 @@ static void inject_pending_event(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	}
			
 
				 }
			
 
				 
			
 
				-static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int r;
			
 
				 	bool req_int_win = !irqchip_in_kernel(vcpu->kvm) &&
			
 
				-		kvm_run->request_interrupt_window;
			
 
				+		vcpu->run->request_interrupt_window;
			
 
				 
			
 
				 	if (vcpu->requests)
			
 
				 		if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
			
@@ -3587,12 +3864,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 			kvm_x86_ops->tlb_flush(vcpu);
			
 
				 		if (test_and_clear_bit(KVM_REQ_REPORT_TPR_ACCESS,
			
 
				 				       &vcpu->requests)) {
			
 
				-			kvm_run->exit_reason = KVM_EXIT_TPR_ACCESS;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS;
			
 
				 			r = 0;
			
 
				 			goto out;
			
 
				 		}
			
 
				 		if (test_and_clear_bit(KVM_REQ_TRIPLE_FAULT, &vcpu->requests)) {
			
 
				-			kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
			
 
				 			r = 0;
			
 
				 			goto out;
			
 
				 		}
			
@@ -3616,7 +3893,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		goto out;
			
 
				 	}
			
 
				 
			
 
				-	inject_pending_event(vcpu, kvm_run);
			
 
				+	inject_pending_event(vcpu);
			
 
				 
			
 
				 	/* enable NMI/IRQ window open exits if needed */
			
 
				 	if (vcpu->arch.nmi_pending)
			
@@ -3642,7 +3919,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	}
			
 
				 
			
 
				 	trace_kvm_entry(vcpu->vcpu_id);
			
 
				-	kvm_x86_ops->run(vcpu, kvm_run);
			
 
				+	kvm_x86_ops->run(vcpu);
			
 
				 
			
 
				 	/*
			
 
				 	 * If the guest has used debug registers, at least dr7
			
@@ -3684,13 +3961,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 
			
 
				 	kvm_lapic_sync_from_vapic(vcpu);
			
 
				 
			
 
				-	r = kvm_x86_ops->handle_exit(kvm_run, vcpu);
			
 
				+	r = kvm_x86_ops->handle_exit(vcpu);
			
 
				 out:
			
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				 
			
 
				-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
			
 
				+static int __vcpu_run(struct kvm_vcpu *vcpu)
			
 
				 {
			
 
				 	int r;
			
 
				 
			
@@ -3710,7 +3987,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	r = 1;
			
 
				 	while (r > 0) {
			
 
				 		if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
			
 
				-			r = vcpu_enter_guest(vcpu, kvm_run);
			
 
				+			r = vcpu_enter_guest(vcpu);
			
 
				 		else {
			
 
				 			up_read(&vcpu->kvm->slots_lock);
			
 
				 			kvm_vcpu_block(vcpu);
			
@@ -3738,14 +4015,14 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		if (kvm_cpu_has_pending_timer(vcpu))
			
 
				 			kvm_inject_pending_timer_irqs(vcpu);
			
 
				 
			
 
				-		if (dm_request_for_irq_injection(vcpu, kvm_run)) {
			
 
				+		if (dm_request_for_irq_injection(vcpu)) {
			
 
				 			r = -EINTR;
			
 
				-			kvm_run->exit_reason = KVM_EXIT_INTR;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_INTR;
			
 
				 			++vcpu->stat.request_irq_exits;
			
 
				 		}
			
 
				 		if (signal_pending(current)) {
			
 
				 			r = -EINTR;
			
 
				-			kvm_run->exit_reason = KVM_EXIT_INTR;
			
 
				+			vcpu->run->exit_reason = KVM_EXIT_INTR;
			
 
				 			++vcpu->stat.signal_exits;
			
 
				 		}
			
 
				 		if (need_resched()) {
			
@@ -3756,7 +4033,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 	}
			
 
				 
			
 
				 	up_read(&vcpu->kvm->slots_lock);
			
 
				-	post_kvm_run_save(vcpu, kvm_run);
			
 
				+	post_kvm_run_save(vcpu);
			
 
				 
			
 
				 	vapic_exit(vcpu);
			
 
				 
			
@@ -3789,15 +4066,13 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 		if (r)
			
 
				 			goto out;
			
 
				 	}
			
 
				-#if CONFIG_HAS_IOMEM
			
 
				 	if (vcpu->mmio_needed) {
			
 
				 		memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8);
			
 
				 		vcpu->mmio_read_completed = 1;
			
 
				 		vcpu->mmio_needed = 0;
			
 
				 
			
 
				 		down_read(&vcpu->kvm->slots_lock);
			
 
				-		r = emulate_instruction(vcpu, kvm_run,
			
 
				-					vcpu->arch.mmio_fault_cr2, 0,
			
 
				+		r = emulate_instruction(vcpu, vcpu->arch.mmio_fault_cr2, 0,
			
 
				 					EMULTYPE_NO_DECODE);
			
 
				 		up_read(&vcpu->kvm->slots_lock);
			
 
				 		if (r == EMULATE_DO_MMIO) {
			
@@ -3808,12 +4083,11 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 
				 			goto out;
			
 
				 		}
			
 
				 	}
			
 
				-#endif
			
 
				 	if (kvm_run->exit_reason == KVM_EXIT_HYPERCALL)
			
 
				 		kvm_register_write(vcpu, VCPU_REGS_RAX,
			
 
				 				     kvm_run->hypercall.ret);
			
 
				 
			
 
				-	r = __vcpu_run(vcpu, kvm_run);
			
 
				+	r = __vcpu_run(vcpu);
			
 
				 
			
 
				 out:
			
 
				 	if (vcpu->sigset_active)
			
@@ -3847,13 +4121,7 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
				 #endif
			
 
				 
			
 
				 	regs->rip = kvm_rip_read(vcpu);
			
 
				-	regs->rflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				-
			
 
				-	/*
			
 
				-	 * Don't leak debug flags in case they were set for guest debugging
			
 
				-	 */
			
 
				-	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				-		regs->rflags &= ~(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				+	regs->rflags = kvm_get_rflags(vcpu);
			
 
				 
			
 
				 	vcpu_put(vcpu);
			
 
				 
			
@@ -3881,12 +4149,10 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 
				 	kvm_register_write(vcpu, VCPU_REGS_R13, regs->r13);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_R14, regs->r14);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_R15, regs->r15);
			
 
				-
			
 
				 #endif
			
 
				 
			
 
				 	kvm_rip_write(vcpu, regs->rip);
			
 
				-	kvm_x86_ops->set_rflags(vcpu, regs->rflags);
			
 
				-
			
 
				+	kvm_set_rflags(vcpu, regs->rflags);
			
 
				 
			
 
				 	vcpu->arch.exception.pending = false;
			
 
				 
			
@@ -4105,7 +4371,7 @@ static int is_vm86_segment(struct kvm_vcpu *vcpu, int seg)
 
				 {
			
 
				 	return (seg != VCPU_SREG_LDTR) &&
			
 
				 		(seg != VCPU_SREG_TR) &&
			
 
				-		(kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM);
			
 
				+		(kvm_get_rflags(vcpu) & X86_EFLAGS_VM);
			
 
				 }
			
 
				 
			
 
				 int kvm_load_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
			
@@ -4133,7 +4399,7 @@ static void save_state_to_tss32(struct kvm_vcpu *vcpu,
 
				 {
			
 
				 	tss->cr3 = vcpu->arch.cr3;
			
 
				 	tss->eip = kvm_rip_read(vcpu);
			
 
				-	tss->eflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				+	tss->eflags = kvm_get_rflags(vcpu);
			
 
				 	tss->eax = kvm_register_read(vcpu, VCPU_REGS_RAX);
			
 
				 	tss->ecx = kvm_register_read(vcpu, VCPU_REGS_RCX);
			
 
				 	tss->edx = kvm_register_read(vcpu, VCPU_REGS_RDX);
			
@@ -4157,7 +4423,7 @@ static int load_state_from_tss32(struct kvm_vcpu *vcpu,
 
				 	kvm_set_cr3(vcpu, tss->cr3);
			
 
				 
			
 
				 	kvm_rip_write(vcpu, tss->eip);
			
 
				-	kvm_x86_ops->set_rflags(vcpu, tss->eflags | 2);
			
 
				+	kvm_set_rflags(vcpu, tss->eflags | 2);
			
 
				 
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->eax);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->ecx);
			
@@ -4195,7 +4461,7 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
 
				 				struct tss_segment_16 *tss)
			
 
				 {
			
 
				 	tss->ip = kvm_rip_read(vcpu);
			
 
				-	tss->flag = kvm_x86_ops->get_rflags(vcpu);
			
 
				+	tss->flag = kvm_get_rflags(vcpu);
			
 
				 	tss->ax = kvm_register_read(vcpu, VCPU_REGS_RAX);
			
 
				 	tss->cx = kvm_register_read(vcpu, VCPU_REGS_RCX);
			
 
				 	tss->dx = kvm_register_read(vcpu, VCPU_REGS_RDX);
			
@@ -4210,14 +4476,13 @@ static void save_state_to_tss16(struct kvm_vcpu *vcpu,
 
				 	tss->ss = get_segment_selector(vcpu, VCPU_SREG_SS);
			
 
				 	tss->ds = get_segment_selector(vcpu, VCPU_SREG_DS);
			
 
				 	tss->ldt = get_segment_selector(vcpu, VCPU_SREG_LDTR);
			
 
				-	tss->prev_task_link = get_segment_selector(vcpu, VCPU_SREG_TR);
			
 
				 }
			
 
				 
			
 
				 static int load_state_from_tss16(struct kvm_vcpu *vcpu,
			
 
				 				 struct tss_segment_16 *tss)
			
 
				 {
			
 
				 	kvm_rip_write(vcpu, tss->ip);
			
 
				-	kvm_x86_ops->set_rflags(vcpu, tss->flag | 2);
			
 
				+	kvm_set_rflags(vcpu, tss->flag | 2);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_RAX, tss->ax);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_RCX, tss->cx);
			
 
				 	kvm_register_write(vcpu, VCPU_REGS_RDX, tss->dx);
			
@@ -4363,15 +4628,10 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 
				 	}
			
 
				 
			
 
				 	if (reason == TASK_SWITCH_IRET) {
			
 
				-		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				-		kvm_x86_ops->set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
			
 
				+		u32 eflags = kvm_get_rflags(vcpu);
			
 
				+		kvm_set_rflags(vcpu, eflags & ~X86_EFLAGS_NT);
			
 
				 	}
			
 
				 
			
 
				-	/* set back link to prev task only if NT bit is set in eflags
			
 
				-	   note that old_tss_sel is not used afetr this point */
			
 
				-	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
			
 
				-		old_tss_sel = 0xffff;
			
 
				-
			
 
				 	/* set back link to prev task only if NT bit is set in eflags
			
 
				 	   note that old_tss_sel is not used afetr this point */
			
 
				 	if (reason != TASK_SWITCH_CALL && reason != TASK_SWITCH_GATE)
			
@@ -4385,8 +4645,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
 
				 					 old_tss_base, &nseg_desc);
			
 
				 
			
 
				 	if (reason == TASK_SWITCH_CALL || reason == TASK_SWITCH_GATE) {
			
 
				-		u32 eflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				-		kvm_x86_ops->set_rflags(vcpu, eflags | X86_EFLAGS_NT);
			
 
				+		u32 eflags = kvm_get_rflags(vcpu);
			
 
				+		kvm_set_rflags(vcpu, eflags | X86_EFLAGS_NT);
			
 
				 	}
			
 
				 
			
 
				 	if (reason != TASK_SWITCH_IRET) {
			
@@ -4438,8 +4698,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
				 
			
 
				 	mmu_reset_needed |= vcpu->arch.cr4 != sregs->cr4;
			
 
				 	kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
			
 
				-	if (!is_long_mode(vcpu) && is_pae(vcpu))
			
 
				+	if (!is_long_mode(vcpu) && is_pae(vcpu)) {
			
 
				 		load_pdptrs(vcpu, vcpu->arch.cr3);
			
 
				+		mmu_reset_needed = 1;
			
 
				+	}
			
 
				 
			
 
				 	if (mmu_reset_needed)
			
 
				 		kvm_mmu_reset_context(vcpu);
			
@@ -4480,12 +4742,32 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
 
				 int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
			
 
				 					struct kvm_guest_debug *dbg)
			
 
				 {
			
 
				+	unsigned long rflags;
			
 
				 	int i, r;
			
 
				 
			
 
				 	vcpu_load(vcpu);
			
 
				 
			
 
				-	if ((dbg->control & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) ==
			
 
				-	    (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP)) {
			
 
				+	if (dbg->control & (KVM_GUESTDBG_INJECT_DB | KVM_GUESTDBG_INJECT_BP)) {
			
 
				+		r = -EBUSY;
			
 
				+		if (vcpu->arch.exception.pending)
			
 
				+			goto unlock_out;
			
 
				+		if (dbg->control & KVM_GUESTDBG_INJECT_DB)
			
 
				+			kvm_queue_exception(vcpu, DB_VECTOR);
			
 
				+		else
			
 
				+			kvm_queue_exception(vcpu, BP_VECTOR);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Read rflags as long as potentially injected trace flags are still
			
 
				+	 * filtered out.
			
 
				+	 */
			
 
				+	rflags = kvm_get_rflags(vcpu);
			
 
				+
			
 
				+	vcpu->guest_debug = dbg->control;
			
 
				+	if (!(vcpu->guest_debug & KVM_GUESTDBG_ENABLE))
			
 
				+		vcpu->guest_debug = 0;
			
 
				+
			
 
				+	if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
			
 
				 		for (i = 0; i < KVM_NR_DB_REGS; ++i)
			
 
				 			vcpu->arch.eff_db[i] = dbg->arch.debugreg[i];
			
 
				 		vcpu->arch.switch_db_regs =
			
@@ -4496,13 +4778,23 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
 
				 		vcpu->arch.switch_db_regs = (vcpu->arch.dr7 & DR7_BP_EN_MASK);
			
 
				 	}
			
 
				 
			
 
				-	r = kvm_x86_ops->set_guest_debug(vcpu, dbg);
			
 
				+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
			
 
				+		vcpu->arch.singlestep_cs =
			
 
				+			get_segment_selector(vcpu, VCPU_SREG_CS);
			
 
				+		vcpu->arch.singlestep_rip = kvm_rip_read(vcpu);
			
 
				+	}
			
 
				+
			
 
				+	/*
			
 
				+	 * Trigger an rflags update that will inject or remove the trace
			
 
				+	 * flags.
			
 
				+	 */
			
 
				+	kvm_set_rflags(vcpu, rflags);
			
 
				+
			
 
				+	kvm_x86_ops->set_guest_debug(vcpu, dbg);
			
 
				 
			
 
				-	if (dbg->control & KVM_GUESTDBG_INJECT_DB)
			
 
				-		kvm_queue_exception(vcpu, DB_VECTOR);
			
 
				-	else if (dbg->control & KVM_GUESTDBG_INJECT_BP)
			
 
				-		kvm_queue_exception(vcpu, BP_VECTOR);
			
 
				+	r = 0;
			
 
				 
			
 
				+unlock_out:
			
 
				 	vcpu_put(vcpu);
			
 
				 
			
 
				 	return r;
			
@@ -4703,14 +4995,26 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
 
				 	return kvm_x86_ops->vcpu_reset(vcpu);
			
 
				 }
			
 
				 
			
 
				-void kvm_arch_hardware_enable(void *garbage)
			
 
				+int kvm_arch_hardware_enable(void *garbage)
			
 
				 {
			
 
				-	kvm_x86_ops->hardware_enable(garbage);
			
 
				+	/*
			
 
				+	 * Since this may be called from a hotplug notifcation,
			
 
				+	 * we can't get the CPU frequency directly.
			
 
				+	 */
			
 
				+	if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) {
			
 
				+		int cpu = raw_smp_processor_id();
			
 
				+		per_cpu(cpu_tsc_khz, cpu) = 0;
			
 
				+	}
			
 
				+
			
 
				+	kvm_shared_msr_cpu_online();
			
 
				+
			
 
				+	return kvm_x86_ops->hardware_enable(garbage);
			
 
				 }
			
 
				 
			
 
				 void kvm_arch_hardware_disable(void *garbage)
			
 
				 {
			
 
				 	kvm_x86_ops->hardware_disable(garbage);
			
 
				+	drop_user_return_notifiers(garbage);
			
 
				 }
			
 
				 
			
 
				 int kvm_arch_hardware_setup(void)
			
@@ -4948,8 +5252,36 @@ int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu)
 
				 	return kvm_x86_ops->interrupt_allowed(vcpu);
			
 
				 }
			
 
				 
			
 
				+unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	unsigned long rflags;
			
 
				+
			
 
				+	rflags = kvm_x86_ops->get_rflags(vcpu);
			
 
				+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
			
 
				+		rflags &= ~(unsigned long)(X86_EFLAGS_TF | X86_EFLAGS_RF);
			
 
				+	return rflags;
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_get_rflags);
			
 
				+
			
 
				+void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
			
 
				+{
			
 
				+	if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP &&
			
 
				+	    vcpu->arch.singlestep_cs ==
			
 
				+			get_segment_selector(vcpu, VCPU_SREG_CS) &&
			
 
				+	    vcpu->arch.singlestep_rip == kvm_rip_read(vcpu))
			
 
				+		rflags |= X86_EFLAGS_TF | X86_EFLAGS_RF;
			
 
				+	kvm_x86_ops->set_rflags(vcpu, rflags);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_set_rflags);
			
 
				+
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_msr);
			
 
				 EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_cr);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmrun);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_vmexit_inject);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_nested_intr_vmexit);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_invlpga);
			
 
				+EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_skinit);
			
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -14,12 +14,76 @@
 
				 
			
 
				 #define KVM_API_VERSION 12
			
 
				 
			
 
				-/* for KVM_TRACE_ENABLE, deprecated */
			
 
				+/* *** Deprecated interfaces *** */
			
 
				+
			
 
				+#define KVM_TRC_SHIFT           16
			
 
				+
			
 
				+#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
			
 
				+#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1))
			
 
				+
			
 
				+#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
			
 
				+#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
			
 
				+#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
			
 
				+
			
 
				+#define KVM_TRC_HEAD_SIZE       12
			
 
				+#define KVM_TRC_CYCLE_SIZE      8
			
 
				+#define KVM_TRC_EXTRA_MAX       7
			
 
				+
			
 
				+#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
			
 
				+#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
			
 
				+#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
			
 
				+#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
			
 
				+#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
			
 
				+#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
			
 
				+#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
			
 
				+#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
			
 
				+#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
			
 
				+#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
			
 
				+#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
			
 
				+#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
			
 
				+#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
			
 
				+#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
			
 
				+#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
			
 
				+#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
			
 
				+#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
			
 
				+#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
			
 
				+#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
			
 
				+#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
			
 
				+#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
			
 
				+#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
			
 
				+#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
			
 
				+#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
			
 
				+
			
 
				 struct kvm_user_trace_setup {
			
 
				-	__u32 buf_size; /* sub_buffer size of each per-cpu */
			
 
				-	__u32 buf_nr; /* the number of sub_buffers of each per-cpu */
			
 
				+	__u32 buf_size;
			
 
				+	__u32 buf_nr;
			
 
				+};
			
 
				+
			
 
				+#define __KVM_DEPRECATED_MAIN_W_0x06 \
			
 
				+	_IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
			
 
				+#define __KVM_DEPRECATED_MAIN_0x07 _IO(KVMIO, 0x07)
			
 
				+#define __KVM_DEPRECATED_MAIN_0x08 _IO(KVMIO, 0x08)
			
 
				+
			
 
				+#define __KVM_DEPRECATED_VM_R_0x70 _IOR(KVMIO, 0x70, struct kvm_assigned_irq)
			
 
				+
			
 
				+struct kvm_breakpoint {
			
 
				+	__u32 enabled;
			
 
				+	__u32 padding;
			
 
				+	__u64 address;
			
 
				+};
			
 
				+
			
 
				+struct kvm_debug_guest {
			
 
				+	__u32 enabled;
			
 
				+	__u32 pad;
			
 
				+	struct kvm_breakpoint breakpoints[4];
			
 
				+	__u32 singlestep;
			
 
				 };
			
 
				 
			
 
				+#define __KVM_DEPRECATED_VCPU_W_0x87 _IOW(KVMIO, 0x87, struct kvm_debug_guest)
			
 
				+
			
 
				+/* *** End of deprecated interfaces *** */
			
 
				+
			
 
				+
			
 
				 /* for KVM_CREATE_MEMORY_REGION */
			
 
				 struct kvm_memory_region {
			
 
				 	__u32 slot;
			
@@ -99,6 +163,7 @@ struct kvm_pit_config {
 
				 
			
 
				 /* For KVM_EXIT_INTERNAL_ERROR */
			
 
				 #define KVM_INTERNAL_ERROR_EMULATION 1
			
 
				+#define KVM_INTERNAL_ERROR_SIMUL_EX 2
			
 
				 
			
 
				 /* for KVM_RUN, returned by mmap(vcpu_fd, offset=0) */
			
 
				 struct kvm_run {
			
@@ -116,6 +181,11 @@ struct kvm_run {
 
				 	__u64 cr8;
			
 
				 	__u64 apic_base;
			
 
				 
			
 
				+#ifdef __KVM_S390
			
 
				+	/* the processor status word for s390 */
			
 
				+	__u64 psw_mask; /* psw upper half */
			
 
				+	__u64 psw_addr; /* psw lower half */
			
 
				+#endif
			
 
				 	union {
			
 
				 		/* KVM_EXIT_UNKNOWN */
			
 
				 		struct {
			
@@ -167,8 +237,6 @@ struct kvm_run {
 
				 		/* KVM_EXIT_S390_SIEIC */
			
 
				 		struct {
			
 
				 			__u8 icptcode;
			
 
				-			__u64 mask; /* psw upper half */
			
 
				-			__u64 addr; /* psw lower half */
			
 
				 			__u16 ipa;
			
 
				 			__u32 ipb;
			
 
				 		} s390_sieic;
			
@@ -187,6 +255,9 @@ struct kvm_run {
 
				 		} dcr;
			
 
				 		struct {
			
 
				 			__u32 suberror;
			
 
				+			/* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
			
 
				+			__u32 ndata;
			
 
				+			__u64 data[16];
			
 
				 		} internal;
			
 
				 		/* Fix the size of the union. */
			
 
				 		char padding[256];
			
@@ -329,24 +400,6 @@ struct kvm_ioeventfd {
 
				 	__u8  pad[36];
			
 
				 };
			
 
				 
			
 
				-#define KVM_TRC_SHIFT           16
			
 
				-/*
			
 
				- * kvm trace categories
			
 
				- */
			
 
				-#define KVM_TRC_ENTRYEXIT       (1 << KVM_TRC_SHIFT)
			
 
				-#define KVM_TRC_HANDLER         (1 << (KVM_TRC_SHIFT + 1)) /* only 12 bits */
			
 
				-
			
 
				-/*
			
 
				- * kvm trace action
			
 
				- */
			
 
				-#define KVM_TRC_VMENTRY         (KVM_TRC_ENTRYEXIT + 0x01)
			
 
				-#define KVM_TRC_VMEXIT          (KVM_TRC_ENTRYEXIT + 0x02)
			
 
				-#define KVM_TRC_PAGE_FAULT      (KVM_TRC_HANDLER + 0x01)
			
 
				-
			
 
				-#define KVM_TRC_HEAD_SIZE       12
			
 
				-#define KVM_TRC_CYCLE_SIZE      8
			
 
				-#define KVM_TRC_EXTRA_MAX       7
			
 
				-
			
 
				 #define KVMIO 0xAE
			
 
				 
			
 
				 /*
			
@@ -367,12 +420,10 @@ struct kvm_ioeventfd {
 
				  */
			
 
				 #define KVM_GET_VCPU_MMAP_SIZE    _IO(KVMIO,   0x04) /* in bytes */
			
 
				 #define KVM_GET_SUPPORTED_CPUID   _IOWR(KVMIO, 0x05, struct kvm_cpuid2)
			
 
				-/*
			
 
				- * ioctls for kvm trace
			
 
				- */
			
 
				-#define KVM_TRACE_ENABLE          _IOW(KVMIO, 0x06, struct kvm_user_trace_setup)
			
 
				-#define KVM_TRACE_PAUSE           _IO(KVMIO,  0x07)
			
 
				-#define KVM_TRACE_DISABLE         _IO(KVMIO,  0x08)
			
 
				+#define KVM_TRACE_ENABLE          __KVM_DEPRECATED_MAIN_W_0x06
			
 
				+#define KVM_TRACE_PAUSE           __KVM_DEPRECATED_MAIN_0x07
			
 
				+#define KVM_TRACE_DISABLE         __KVM_DEPRECATED_MAIN_0x08
			
 
				+
			
 
				 /*
			
 
				  * Extension capability list.
			
 
				  */
			
@@ -436,6 +487,15 @@ struct kvm_ioeventfd {
 
				 #endif
			
 
				 #define KVM_CAP_IOEVENTFD 36
			
 
				 #define KVM_CAP_SET_IDENTITY_MAP_ADDR 37
			
 
				+#ifdef __KVM_HAVE_XEN_HVM
			
 
				+#define KVM_CAP_XEN_HVM 38
			
 
				+#endif
			
 
				+#define KVM_CAP_ADJUST_CLOCK 39
			
 
				+#define KVM_CAP_INTERNAL_ERROR_DATA 40
			
 
				+#ifdef __KVM_HAVE_VCPU_EVENTS
			
 
				+#define KVM_CAP_VCPU_EVENTS 41
			
 
				+#endif
			
 
				+#define KVM_CAP_S390_PSW 42
			
 
				 
			
 
				 #ifdef KVM_CAP_IRQ_ROUTING
			
 
				 
			
@@ -488,6 +548,18 @@ struct kvm_x86_mce {
 
				 };
			
 
				 #endif
			
 
				 
			
 
				+#ifdef KVM_CAP_XEN_HVM
			
 
				+struct kvm_xen_hvm_config {
			
 
				+	__u32 flags;
			
 
				+	__u32 msr;
			
 
				+	__u64 blob_addr_32;
			
 
				+	__u64 blob_addr_64;
			
 
				+	__u8 blob_size_32;
			
 
				+	__u8 blob_size_64;
			
 
				+	__u8 pad2[30];
			
 
				+};
			
 
				+#endif
			
 
				+
			
 
				 #define KVM_IRQFD_FLAG_DEASSIGN (1 << 0)
			
 
				 
			
 
				 struct kvm_irqfd {
			
@@ -497,55 +569,66 @@ struct kvm_irqfd {
 
				 	__u8  pad[20];
			
 
				 };
			
 
				 
			
 
				+struct kvm_clock_data {
			
 
				+	__u64 clock;
			
 
				+	__u32 flags;
			
 
				+	__u32 pad[9];
			
 
				+};
			
 
				+
			
 
				 /*
			
 
				  * ioctls for VM fds
			
 
				  */
			
 
				-#define KVM_SET_MEMORY_REGION     _IOW(KVMIO, 0x40, struct kvm_memory_region)
			
 
				+#define KVM_SET_MEMORY_REGION     _IOW(KVMIO,  0x40, struct kvm_memory_region)
			
 
				 /*
			
 
				  * KVM_CREATE_VCPU receives as a parameter the vcpu slot, and returns
			
 
				  * a vcpu fd.
			
 
				  */
			
 
				-#define KVM_CREATE_VCPU           _IO(KVMIO,  0x41)
			
 
				-#define KVM_GET_DIRTY_LOG         _IOW(KVMIO, 0x42, struct kvm_dirty_log)
			
 
				-#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO, 0x43, struct kvm_memory_alias)
			
 
				-#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO, 0x44)
			
 
				-#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO, 0x45)
			
 
				-#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46,\
			
 
				+#define KVM_CREATE_VCPU           _IO(KVMIO,   0x41)
			
 
				+#define KVM_GET_DIRTY_LOG         _IOW(KVMIO,  0x42, struct kvm_dirty_log)
			
 
				+#define KVM_SET_MEMORY_ALIAS      _IOW(KVMIO,  0x43, struct kvm_memory_alias)
			
 
				+#define KVM_SET_NR_MMU_PAGES      _IO(KVMIO,   0x44)
			
 
				+#define KVM_GET_NR_MMU_PAGES      _IO(KVMIO,   0x45)
			
 
				+#define KVM_SET_USER_MEMORY_REGION _IOW(KVMIO, 0x46, \
			
 
				 					struct kvm_userspace_memory_region)
			
 
				-#define KVM_SET_TSS_ADDR          _IO(KVMIO, 0x47)
			
 
				-#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64)
			
 
				+#define KVM_SET_TSS_ADDR          _IO(KVMIO,   0x47)
			
 
				+#define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO,  0x48, __u64)
			
 
				 /* Device model IOC */
			
 
				-#define KVM_CREATE_IRQCHIP	  _IO(KVMIO,  0x60)
			
 
				-#define KVM_IRQ_LINE		  _IOW(KVMIO, 0x61, struct kvm_irq_level)
			
 
				-#define KVM_GET_IRQCHIP		  _IOWR(KVMIO, 0x62, struct kvm_irqchip)
			
 
				-#define KVM_SET_IRQCHIP		  _IOR(KVMIO,  0x63, struct kvm_irqchip)
			
 
				-#define KVM_CREATE_PIT		  _IO(KVMIO,  0x64)
			
 
				-#define KVM_GET_PIT		  _IOWR(KVMIO, 0x65, struct kvm_pit_state)
			
 
				-#define KVM_SET_PIT		  _IOR(KVMIO,  0x66, struct kvm_pit_state)
			
 
				-#define KVM_IRQ_LINE_STATUS	  _IOWR(KVMIO, 0x67, struct kvm_irq_level)
			
 
				+#define KVM_CREATE_IRQCHIP        _IO(KVMIO,   0x60)
			
 
				+#define KVM_IRQ_LINE              _IOW(KVMIO,  0x61, struct kvm_irq_level)
			
 
				+#define KVM_GET_IRQCHIP           _IOWR(KVMIO, 0x62, struct kvm_irqchip)
			
 
				+#define KVM_SET_IRQCHIP           _IOR(KVMIO,  0x63, struct kvm_irqchip)
			
 
				+#define KVM_CREATE_PIT            _IO(KVMIO,   0x64)
			
 
				+#define KVM_GET_PIT               _IOWR(KVMIO, 0x65, struct kvm_pit_state)
			
 
				+#define KVM_SET_PIT               _IOR(KVMIO,  0x66, struct kvm_pit_state)
			
 
				+#define KVM_IRQ_LINE_STATUS       _IOWR(KVMIO, 0x67, struct kvm_irq_level)
			
 
				 #define KVM_REGISTER_COALESCED_MMIO \
			
 
				 			_IOW(KVMIO,  0x67, struct kvm_coalesced_mmio_zone)
			
 
				 #define KVM_UNREGISTER_COALESCED_MMIO \
			
 
				 			_IOW(KVMIO,  0x68, struct kvm_coalesced_mmio_zone)
			
 
				-#define KVM_ASSIGN_PCI_DEVICE _IOR(KVMIO, 0x69, \
			
 
				-				   struct kvm_assigned_pci_dev)
			
 
				-#define KVM_SET_GSI_ROUTING       _IOW(KVMIO, 0x6a, struct kvm_irq_routing)
			
 
				+#define KVM_ASSIGN_PCI_DEVICE     _IOR(KVMIO,  0x69, \
			
 
				+				       struct kvm_assigned_pci_dev)
			
 
				+#define KVM_SET_GSI_ROUTING       _IOW(KVMIO,  0x6a, struct kvm_irq_routing)
			
 
				 /* deprecated, replaced by KVM_ASSIGN_DEV_IRQ */
			
 
				-#define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \
			
 
				-			    struct kvm_assigned_irq)
			
 
				-#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO, 0x70, struct kvm_assigned_irq)
			
 
				-#define KVM_REINJECT_CONTROL      _IO(KVMIO, 0x71)
			
 
				-#define KVM_DEASSIGN_PCI_DEVICE _IOW(KVMIO, 0x72, \
			
 
				-				     struct kvm_assigned_pci_dev)
			
 
				-#define KVM_ASSIGN_SET_MSIX_NR \
			
 
				-			_IOW(KVMIO, 0x73, struct kvm_assigned_msix_nr)
			
 
				-#define KVM_ASSIGN_SET_MSIX_ENTRY \
			
 
				-			_IOW(KVMIO, 0x74, struct kvm_assigned_msix_entry)
			
 
				-#define KVM_DEASSIGN_DEV_IRQ       _IOW(KVMIO, 0x75, struct kvm_assigned_irq)
			
 
				-#define KVM_IRQFD                  _IOW(KVMIO, 0x76, struct kvm_irqfd)
			
 
				-#define KVM_CREATE_PIT2		   _IOW(KVMIO, 0x77, struct kvm_pit_config)
			
 
				-#define KVM_SET_BOOT_CPU_ID        _IO(KVMIO, 0x78)
			
 
				-#define KVM_IOEVENTFD             _IOW(KVMIO, 0x79, struct kvm_ioeventfd)
			
 
				+#define KVM_ASSIGN_IRQ            __KVM_DEPRECATED_VM_R_0x70
			
 
				+#define KVM_ASSIGN_DEV_IRQ        _IOW(KVMIO,  0x70, struct kvm_assigned_irq)
			
 
				+#define KVM_REINJECT_CONTROL      _IO(KVMIO,   0x71)
			
 
				+#define KVM_DEASSIGN_PCI_DEVICE   _IOW(KVMIO,  0x72, \
			
 
				+				       struct kvm_assigned_pci_dev)
			
 
				+#define KVM_ASSIGN_SET_MSIX_NR    _IOW(KVMIO,  0x73, \
			
 
				+				       struct kvm_assigned_msix_nr)
			
 
				+#define KVM_ASSIGN_SET_MSIX_ENTRY _IOW(KVMIO,  0x74, \
			
 
				+				       struct kvm_assigned_msix_entry)
			
 
				+#define KVM_DEASSIGN_DEV_IRQ      _IOW(KVMIO,  0x75, struct kvm_assigned_irq)
			
 
				+#define KVM_IRQFD                 _IOW(KVMIO,  0x76, struct kvm_irqfd)
			
 
				+#define KVM_CREATE_PIT2		  _IOW(KVMIO,  0x77, struct kvm_pit_config)
			
 
				+#define KVM_SET_BOOT_CPU_ID       _IO(KVMIO,   0x78)
			
 
				+#define KVM_IOEVENTFD             _IOW(KVMIO,  0x79, struct kvm_ioeventfd)
			
 
				+#define KVM_XEN_HVM_CONFIG        _IOW(KVMIO,  0x7a, struct kvm_xen_hvm_config)
			
 
				+#define KVM_SET_CLOCK             _IOW(KVMIO,  0x7b, struct kvm_clock_data)
			
 
				+#define KVM_GET_CLOCK             _IOR(KVMIO,  0x7c, struct kvm_clock_data)
			
 
				+/* Available with KVM_CAP_PIT_STATE2 */
			
 
				+#define KVM_GET_PIT2              _IOR(KVMIO,  0x9f, struct kvm_pit_state2)
			
 
				+#define KVM_SET_PIT2              _IOW(KVMIO,  0xa0, struct kvm_pit_state2)
			
 
				 
			
 
				 /*
			
 
				  * ioctls for vcpu fds
			
@@ -558,7 +641,7 @@ struct kvm_irqfd {
 
				 #define KVM_TRANSLATE             _IOWR(KVMIO, 0x85, struct kvm_translation)
			
 
				 #define KVM_INTERRUPT             _IOW(KVMIO,  0x86, struct kvm_interrupt)
			
 
				 /* KVM_DEBUG_GUEST is no longer supported, use KVM_SET_GUEST_DEBUG instead */
			
 
				-#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_DEBUG_GUEST
			
 
				+#define KVM_DEBUG_GUEST           __KVM_DEPRECATED_VCPU_W_0x87
			
 
				 #define KVM_GET_MSRS              _IOWR(KVMIO, 0x88, struct kvm_msrs)
			
 
				 #define KVM_SET_MSRS              _IOW(KVMIO,  0x89, struct kvm_msrs)
			
 
				 #define KVM_SET_CPUID             _IOW(KVMIO,  0x8a, struct kvm_cpuid)
			
@@ -570,7 +653,7 @@ struct kvm_irqfd {
 
				 #define KVM_SET_CPUID2            _IOW(KVMIO,  0x90, struct kvm_cpuid2)
			
 
				 #define KVM_GET_CPUID2            _IOWR(KVMIO, 0x91, struct kvm_cpuid2)
			
 
				 /* Available with KVM_CAP_VAPIC */
			
 
				-#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO,  0x92, struct kvm_tpr_access_ctl)
			
 
				+#define KVM_TPR_ACCESS_REPORTING  _IOWR(KVMIO, 0x92, struct kvm_tpr_access_ctl)
			
 
				 /* Available with KVM_CAP_VAPIC */
			
 
				 #define KVM_SET_VAPIC_ADDR        _IOW(KVMIO,  0x93, struct kvm_vapic_addr)
			
 
				 /* valid for virtual machine (for floating interrupt)_and_ vcpu */
			
@@ -582,66 +665,23 @@ struct kvm_irqfd {
 
				 /* initial ipl psw for s390 */
			
 
				 #define KVM_S390_SET_INITIAL_PSW  _IOW(KVMIO,  0x96, struct kvm_s390_psw)
			
 
				 /* initial reset for s390 */
			
 
				-#define KVM_S390_INITIAL_RESET    _IO(KVMIO,  0x97)
			
 
				+#define KVM_S390_INITIAL_RESET    _IO(KVMIO,   0x97)
			
 
				 #define KVM_GET_MP_STATE          _IOR(KVMIO,  0x98, struct kvm_mp_state)
			
 
				 #define KVM_SET_MP_STATE          _IOW(KVMIO,  0x99, struct kvm_mp_state)
			
 
				 /* Available with KVM_CAP_NMI */
			
 
				-#define KVM_NMI                   _IO(KVMIO,  0x9a)
			
 
				+#define KVM_NMI                   _IO(KVMIO,   0x9a)
			
 
				 /* Available with KVM_CAP_SET_GUEST_DEBUG */
			
 
				 #define KVM_SET_GUEST_DEBUG       _IOW(KVMIO,  0x9b, struct kvm_guest_debug)
			
 
				 /* MCE for x86 */
			
 
				 #define KVM_X86_SETUP_MCE         _IOW(KVMIO,  0x9c, __u64)
			
 
				 #define KVM_X86_GET_MCE_CAP_SUPPORTED _IOR(KVMIO,  0x9d, __u64)
			
 
				 #define KVM_X86_SET_MCE           _IOW(KVMIO,  0x9e, struct kvm_x86_mce)
			
 
				-
			
 
				-/*
			
 
				- * Deprecated interfaces
			
 
				- */
			
 
				-struct kvm_breakpoint {
			
 
				-	__u32 enabled;
			
 
				-	__u32 padding;
			
 
				-	__u64 address;
			
 
				-};
			
 
				-
			
 
				-struct kvm_debug_guest {
			
 
				-	__u32 enabled;
			
 
				-	__u32 pad;
			
 
				-	struct kvm_breakpoint breakpoints[4];
			
 
				-	__u32 singlestep;
			
 
				-};
			
 
				-
			
 
				-#define __KVM_DEPRECATED_DEBUG_GUEST _IOW(KVMIO,  0x87, struct kvm_debug_guest)
			
 
				-
			
 
				+/* IA64 stack access */
			
 
				 #define KVM_IA64_VCPU_GET_STACK   _IOR(KVMIO,  0x9a, void *)
			
 
				 #define KVM_IA64_VCPU_SET_STACK   _IOW(KVMIO,  0x9b, void *)
			
 
				-
			
 
				-#define KVM_GET_PIT2   _IOR(KVMIO,   0x9f, struct kvm_pit_state2)
			
 
				-#define KVM_SET_PIT2   _IOW(KVMIO,   0xa0, struct kvm_pit_state2)
			
 
				-
			
 
				-#define KVM_TRC_INJ_VIRQ         (KVM_TRC_HANDLER + 0x02)
			
 
				-#define KVM_TRC_REDELIVER_EVT    (KVM_TRC_HANDLER + 0x03)
			
 
				-#define KVM_TRC_PEND_INTR        (KVM_TRC_HANDLER + 0x04)
			
 
				-#define KVM_TRC_IO_READ          (KVM_TRC_HANDLER + 0x05)
			
 
				-#define KVM_TRC_IO_WRITE         (KVM_TRC_HANDLER + 0x06)
			
 
				-#define KVM_TRC_CR_READ          (KVM_TRC_HANDLER + 0x07)
			
 
				-#define KVM_TRC_CR_WRITE         (KVM_TRC_HANDLER + 0x08)
			
 
				-#define KVM_TRC_DR_READ          (KVM_TRC_HANDLER + 0x09)
			
 
				-#define KVM_TRC_DR_WRITE         (KVM_TRC_HANDLER + 0x0A)
			
 
				-#define KVM_TRC_MSR_READ         (KVM_TRC_HANDLER + 0x0B)
			
 
				-#define KVM_TRC_MSR_WRITE        (KVM_TRC_HANDLER + 0x0C)
			
 
				-#define KVM_TRC_CPUID            (KVM_TRC_HANDLER + 0x0D)
			
 
				-#define KVM_TRC_INTR             (KVM_TRC_HANDLER + 0x0E)
			
 
				-#define KVM_TRC_NMI              (KVM_TRC_HANDLER + 0x0F)
			
 
				-#define KVM_TRC_VMMCALL          (KVM_TRC_HANDLER + 0x10)
			
 
				-#define KVM_TRC_HLT              (KVM_TRC_HANDLER + 0x11)
			
 
				-#define KVM_TRC_CLTS             (KVM_TRC_HANDLER + 0x12)
			
 
				-#define KVM_TRC_LMSW             (KVM_TRC_HANDLER + 0x13)
			
 
				-#define KVM_TRC_APIC_ACCESS      (KVM_TRC_HANDLER + 0x14)
			
 
				-#define KVM_TRC_TDP_FAULT        (KVM_TRC_HANDLER + 0x15)
			
 
				-#define KVM_TRC_GTLB_WRITE       (KVM_TRC_HANDLER + 0x16)
			
 
				-#define KVM_TRC_STLB_WRITE       (KVM_TRC_HANDLER + 0x17)
			
 
				-#define KVM_TRC_STLB_INVAL       (KVM_TRC_HANDLER + 0x18)
			
 
				-#define KVM_TRC_PPC_INSTR        (KVM_TRC_HANDLER + 0x19)
			
 
				+/* Available with KVM_CAP_VCPU_EVENTS */
			
 
				+#define KVM_GET_VCPU_EVENTS       _IOR(KVMIO,  0x9f, struct kvm_vcpu_events)
			
 
				+#define KVM_SET_VCPU_EVENTS       _IOW(KVMIO,  0xa0, struct kvm_vcpu_events)
			
 
				 
			
 
				 #define KVM_DEV_ASSIGN_ENABLE_IOMMU	(1 << 0)
			
 
				 
			
@@ -696,4 +736,4 @@ struct kvm_assigned_msix_entry {
 
				 	__u16 padding[3];
			
 
				 };
			
 
				 
			
 
				-#endif
			
 
				+#endif /* __LINUX_KVM_H */
			
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -120,7 +120,7 @@ struct kvm_kernel_irq_routing_entry {
 
				 	u32 gsi;
			
 
				 	u32 type;
			
 
				 	int (*set)(struct kvm_kernel_irq_routing_entry *e,
			
 
				-		    struct kvm *kvm, int level);
			
 
				+		   struct kvm *kvm, int irq_source_id, int level);
			
 
				 	union {
			
 
				 		struct {
			
 
				 			unsigned irqchip;
			
@@ -128,9 +128,28 @@ struct kvm_kernel_irq_routing_entry {
 
				 		} irqchip;
			
 
				 		struct msi_msg msi;
			
 
				 	};
			
 
				-	struct list_head link;
			
 
				+	struct hlist_node link;
			
 
				+};
			
 
				+
			
 
				+#ifdef __KVM_HAVE_IOAPIC
			
 
				+
			
 
				+struct kvm_irq_routing_table {
			
 
				+	int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS];
			
 
				+	struct kvm_kernel_irq_routing_entry *rt_entries;
			
 
				+	u32 nr_rt_entries;
			
 
				+	/*
			
 
				+	 * Array indexed by gsi. Each entry contains list of irq chips
			
 
				+	 * the gsi is connected to.
			
 
				+	 */
			
 
				+	struct hlist_head map[0];
			
 
				 };
			
 
				 
			
 
				+#else
			
 
				+
			
 
				+struct kvm_irq_routing_table {};
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				 struct kvm {
			
 
				 	spinlock_t mmu_lock;
			
 
				 	spinlock_t requests_lock;
			
@@ -166,8 +185,9 @@ struct kvm {
 
				 
			
 
				 	struct mutex irq_lock;
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				-	struct list_head irq_routing; /* of kvm_kernel_irq_routing_entry */
			
 
				+	struct kvm_irq_routing_table *irq_routing;
			
 
				 	struct hlist_head mask_notifier_list;
			
 
				+	struct hlist_head irq_ack_notifier_list;
			
 
				 #endif
			
 
				 
			
 
				 #ifdef KVM_ARCH_WANT_MMU_NOTIFIER
			
@@ -266,6 +286,7 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 
				 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
			
 
				 
			
 
				 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
			
 
				+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu);
			
 
				 void kvm_resched(struct kvm_vcpu *vcpu);
			
 
				 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
			
 
				 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
			
@@ -325,7 +346,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 
				 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
			
 
				 
			
 
				 int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu);
			
 
				-void kvm_arch_hardware_enable(void *garbage);
			
 
				+int kvm_arch_hardware_enable(void *garbage);
			
 
				 void kvm_arch_hardware_disable(void *garbage);
			
 
				 int kvm_arch_hardware_setup(void);
			
 
				 void kvm_arch_hardware_unsetup(void);
			
@@ -390,7 +411,12 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 				      struct kvm_irq_mask_notifier *kimn);
			
 
				 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask);
			
 
				 
			
 
				-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level);
			
 
				+#ifdef __KVM_HAVE_IOAPIC
			
 
				+void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic,
			
 
				+				   union kvm_ioapic_redirect_entry *entry,
			
 
				+				   unsigned long *deliver_bitmask);
			
 
				+#endif
			
 
				+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level);
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin);
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian);
			
@@ -552,4 +578,21 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
 
				 	return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
			
 
				 }
			
 
				 #endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_DEVICE_ASSIGNMENT
			
 
				+
			
 
				+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+				  unsigned long arg);
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+						unsigned long arg)
			
 
				+{
			
 
				+	return -ENOTTY;
			
 
				+}
			
 
				+
			
 
				 #endif
			
 
				+
			
 
				+#endif
			
 
				+
			
--- a/include/linux/user-return-notifier.h
+++ b/include/linux/user-return-notifier.h
@@ -0,0 +1,49 @@
 
				+#ifndef _LINUX_USER_RETURN_NOTIFIER_H
			
 
				+#define _LINUX_USER_RETURN_NOTIFIER_H
			
 
				+
			
 
				+#ifdef CONFIG_USER_RETURN_NOTIFIER
			
 
				+
			
 
				+#include <linux/list.h>
			
 
				+#include <linux/sched.h>
			
 
				+
			
 
				+struct user_return_notifier {
			
 
				+	void (*on_user_return)(struct user_return_notifier *urn);
			
 
				+	struct hlist_node link;
			
 
				+};
			
 
				+
			
 
				+
			
 
				+void user_return_notifier_register(struct user_return_notifier *urn);
			
 
				+void user_return_notifier_unregister(struct user_return_notifier *urn);
			
 
				+
			
 
				+static inline void propagate_user_return_notify(struct task_struct *prev,
			
 
				+						struct task_struct *next)
			
 
				+{
			
 
				+	if (test_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY)) {
			
 
				+		clear_tsk_thread_flag(prev, TIF_USER_RETURN_NOTIFY);
			
 
				+		set_tsk_thread_flag(next, TIF_USER_RETURN_NOTIFY);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+void fire_user_return_notifiers(void);
			
 
				+
			
 
				+static inline void clear_user_return_notifier(struct task_struct *p)
			
 
				+{
			
 
				+	clear_tsk_thread_flag(p, TIF_USER_RETURN_NOTIFY);
			
 
				+}
			
 
				+
			
 
				+#else
			
 
				+
			
 
				+struct user_return_notifier {};
			
 
				+
			
 
				+static inline void propagate_user_return_notify(struct task_struct *prev,
			
 
				+						struct task_struct *next)
			
 
				+{
			
 
				+}
			
 
				+
			
 
				+static inline void fire_user_return_notifiers(void) {}
			
 
				+
			
 
				+static inline void clear_user_return_notifier(struct task_struct *p) {}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+#endif
			
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -99,6 +99,7 @@ obj-$(CONFIG_SLOW_WORK) += slow-work.o
 
				 obj-$(CONFIG_SLOW_WORK_DEBUG) += slow-work-debugfs.o
			
 
				 obj-$(CONFIG_PERF_EVENTS) += perf_event.o
			
 
				 obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o
			
 
				+obj-$(CONFIG_USER_RETURN_NOTIFIER) += user-return-notifier.o
			
 
				 
			
 
				 ifneq ($(CONFIG_SCHED_OMIT_FRAME_POINTER),y)
			
 
				 # According to Alan Modra <alan@linuxcare.com.au>, the -fno-omit-frame-pointer is
			
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -64,6 +64,7 @@
 
				 #include <linux/magic.h>
			
 
				 #include <linux/perf_event.h>
			
 
				 #include <linux/posix-timers.h>
			
 
				+#include <linux/user-return-notifier.h>
			
 
				 
			
 
				 #include <asm/pgtable.h>
			
 
				 #include <asm/pgalloc.h>
			
@@ -249,6 +250,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 
				 		goto out;
			
 
				 
			
 
				 	setup_thread_stack(tsk, orig);
			
 
				+	clear_user_return_notifier(tsk);
			
 
				 	stackend = end_of_stack(tsk);
			
 
				 	*stackend = STACK_END_MAGIC;	/* for overflow detection */
			
 
				 
			
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -0,0 +1,46 @@
 
				+
			
 
				+#include <linux/user-return-notifier.h>
			
 
				+#include <linux/percpu.h>
			
 
				+#include <linux/sched.h>
			
 
				+#include <linux/module.h>
			
 
				+
			
 
				+static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
			
 
				+
			
 
				+#define URN_LIST_HEAD per_cpu(return_notifier_list, raw_smp_processor_id())
			
 
				+
			
 
				+/*
			
 
				+ * Request a notification when the current cpu returns to userspace.  Must be
			
 
				+ * called in atomic context.  The notifier will also be called in atomic
			
 
				+ * context.
			
 
				+ */
			
 
				+void user_return_notifier_register(struct user_return_notifier *urn)
			
 
				+{
			
 
				+	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
			
 
				+	hlist_add_head(&urn->link, &URN_LIST_HEAD);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(user_return_notifier_register);
			
 
				+
			
 
				+/*
			
 
				+ * Removes a registered user return notifier.  Must be called from atomic
			
 
				+ * context, and from the same cpu registration occured in.
			
 
				+ */
			
 
				+void user_return_notifier_unregister(struct user_return_notifier *urn)
			
 
				+{
			
 
				+	hlist_del(&urn->link);
			
 
				+	if (hlist_empty(&URN_LIST_HEAD))
			
 
				+		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
			
 
				+
			
 
				+/* Calls registered user return notifiers */
			
 
				+void fire_user_return_notifiers(void)
			
 
				+{
			
 
				+	struct user_return_notifier *urn;
			
 
				+	struct hlist_node *tmp1, *tmp2;
			
 
				+	struct hlist_head *head;
			
 
				+
			
 
				+	head = &get_cpu_var(return_notifier_list);
			
 
				+	hlist_for_each_entry_safe(urn, tmp1, tmp2, head, link)
			
 
				+		urn->on_user_return(urn);
			
 
				+	put_cpu_var(return_notifier_list);
			
 
				+}
			
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -0,0 +1,818 @@
 
				+/*
			
 
				+ * Kernel-based Virtual Machine - device assignment support
			
 
				+ *
			
 
				+ * Copyright (C) 2006-9 Red Hat, Inc
			
 
				+ *
			
 
				+ * This work is licensed under the terms of the GNU GPL, version 2.  See
			
 
				+ * the COPYING file in the top-level directory.
			
 
				+ *
			
 
				+ */
			
 
				+
			
 
				+#include <linux/kvm_host.h>
			
 
				+#include <linux/kvm.h>
			
 
				+#include <linux/uaccess.h>
			
 
				+#include <linux/vmalloc.h>
			
 
				+#include <linux/errno.h>
			
 
				+#include <linux/spinlock.h>
			
 
				+#include <linux/pci.h>
			
 
				+#include <linux/interrupt.h>
			
 
				+#include "irq.h"
			
 
				+
			
 
				+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
			
 
				+						      int assigned_dev_id)
			
 
				+{
			
 
				+	struct list_head *ptr;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	list_for_each(ptr, head) {
			
 
				+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
			
 
				+		if (match->assigned_dev_id == assigned_dev_id)
			
 
				+			return match;
			
 
				+	}
			
 
				+	return NULL;
			
 
				+}
			
 
				+
			
 
				+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
			
 
				+				    *assigned_dev, int irq)
			
 
				+{
			
 
				+	int i, index;
			
 
				+	struct msix_entry *host_msix_entries;
			
 
				+
			
 
				+	host_msix_entries = assigned_dev->host_msix_entries;
			
 
				+
			
 
				+	index = -1;
			
 
				+	for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+		if (irq == host_msix_entries[i].vector) {
			
 
				+			index = i;
			
 
				+			break;
			
 
				+		}
			
 
				+	if (index < 0) {
			
 
				+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
			
 
				+		return 0;
			
 
				+	}
			
 
				+
			
 
				+	return index;
			
 
				+}
			
 
				+
			
 
				+static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
			
 
				+{
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				+	struct kvm *kvm;
			
 
				+	int i;
			
 
				+
			
 
				+	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
			
 
				+				    interrupt_work);
			
 
				+	kvm = assigned_dev->kvm;
			
 
				+
			
 
				+	spin_lock_irq(&assigned_dev->assigned_dev_lock);
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		struct kvm_guest_msix_entry *guest_entries =
			
 
				+			assigned_dev->guest_msix_entries;
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++) {
			
 
				+			if (!(guest_entries[i].flags &
			
 
				+					KVM_ASSIGNED_MSIX_PENDING))
			
 
				+				continue;
			
 
				+			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
			
 
				+			kvm_set_irq(assigned_dev->kvm,
			
 
				+				    assigned_dev->irq_source_id,
			
 
				+				    guest_entries[i].vector, 1);
			
 
				+		}
			
 
				+	} else
			
 
				+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
			
 
				+			    assigned_dev->guest_irq, 1);
			
 
				+
			
 
				+	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
			
 
				+}
			
 
				+
			
 
				+static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
			
 
				+{
			
 
				+	unsigned long flags;
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev =
			
 
				+		(struct kvm_assigned_dev_kernel *) dev_id;
			
 
				+
			
 
				+	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		int index = find_index_from_host_irq(assigned_dev, irq);
			
 
				+		if (index < 0)
			
 
				+			goto out;
			
 
				+		assigned_dev->guest_msix_entries[index].flags |=
			
 
				+			KVM_ASSIGNED_MSIX_PENDING;
			
 
				+	}
			
 
				+
			
 
				+	schedule_work(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
			
 
				+		disable_irq_nosync(irq);
			
 
				+		assigned_dev->host_irq_disabled = true;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
			
 
				+	return IRQ_HANDLED;
			
 
				+}
			
 
				+
			
 
				+/* Ack the irq line for an assigned device */
			
 
				+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
			
 
				+{
			
 
				+	struct kvm_assigned_dev_kernel *dev;
			
 
				+	unsigned long flags;
			
 
				+
			
 
				+	if (kian->gsi == -1)
			
 
				+		return;
			
 
				+
			
 
				+	dev = container_of(kian, struct kvm_assigned_dev_kernel,
			
 
				+			   ack_notifier);
			
 
				+
			
 
				+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
			
 
				+
			
 
				+	/* The guest irq may be shared so this ack may be
			
 
				+	 * from another device.
			
 
				+	 */
			
 
				+	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
			
 
				+	if (dev->host_irq_disabled) {
			
 
				+		enable_irq(dev->host_irq);
			
 
				+		dev->host_irq_disabled = false;
			
 
				+	}
			
 
				+	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
			
 
				+}
			
 
				+
			
 
				+static void deassign_guest_irq(struct kvm *kvm,
			
 
				+			       struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
			
 
				+	assigned_dev->ack_notifier.gsi = -1;
			
 
				+
			
 
				+	if (assigned_dev->irq_source_id != -1)
			
 
				+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
			
 
				+	assigned_dev->irq_source_id = -1;
			
 
				+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
			
 
				+}
			
 
				+
			
 
				+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
			
 
				+static void deassign_host_irq(struct kvm *kvm,
			
 
				+			      struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	/*
			
 
				+	 * In kvm_free_device_irq, cancel_work_sync return true if:
			
 
				+	 * 1. work is scheduled, and then cancelled.
			
 
				+	 * 2. work callback is executed.
			
 
				+	 *
			
 
				+	 * The first one ensured that the irq is disabled and no more events
			
 
				+	 * would happen. But for the second one, the irq may be enabled (e.g.
			
 
				+	 * for MSI). So we disable irq here to prevent further events.
			
 
				+	 *
			
 
				+	 * Notice this maybe result in nested disable if the interrupt type is
			
 
				+	 * INTx, but it's OK for we are going to free it.
			
 
				+	 *
			
 
				+	 * If this function is a part of VM destroy, please ensure that till
			
 
				+	 * now, the kvm state is still legal for probably we also have to wait
			
 
				+	 * interrupt_work done.
			
 
				+	 */
			
 
				+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				+		int i;
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+			disable_irq_nosync(assigned_dev->
			
 
				+					   host_msix_entries[i].vector);
			
 
				+
			
 
				+		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				+			free_irq(assigned_dev->host_msix_entries[i].vector,
			
 
				+				 (void *)assigned_dev);
			
 
				+
			
 
				+		assigned_dev->entries_nr = 0;
			
 
				+		kfree(assigned_dev->host_msix_entries);
			
 
				+		kfree(assigned_dev->guest_msix_entries);
			
 
				+		pci_disable_msix(assigned_dev->dev);
			
 
				+	} else {
			
 
				+		/* Deal with MSI and INTx */
			
 
				+		disable_irq_nosync(assigned_dev->host_irq);
			
 
				+		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				+
			
 
				+		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
			
 
				+
			
 
				+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
			
 
				+			pci_disable_msi(assigned_dev->dev);
			
 
				+	}
			
 
				+
			
 
				+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
			
 
				+}
			
 
				+
			
 
				+static int kvm_deassign_irq(struct kvm *kvm,
			
 
				+			    struct kvm_assigned_dev_kernel *assigned_dev,
			
 
				+			    unsigned long irq_requested_type)
			
 
				+{
			
 
				+	unsigned long guest_irq_type, host_irq_type;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		return -EINVAL;
			
 
				+	/* no irq assignment to deassign */
			
 
				+	if (!assigned_dev->irq_requested_type)
			
 
				+		return -ENXIO;
			
 
				+
			
 
				+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
			
 
				+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
			
 
				+
			
 
				+	if (host_irq_type)
			
 
				+		deassign_host_irq(kvm, assigned_dev);
			
 
				+	if (guest_irq_type)
			
 
				+		deassign_guest_irq(kvm, assigned_dev);
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+static void kvm_free_assigned_irq(struct kvm *kvm,
			
 
				+				  struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				+{
			
 
				+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
			
 
				+}
			
 
				+
			
 
				+static void kvm_free_assigned_device(struct kvm *kvm,
			
 
				+				     struct kvm_assigned_dev_kernel
			
 
				+				     *assigned_dev)
			
 
				+{
			
 
				+	kvm_free_assigned_irq(kvm, assigned_dev);
			
 
				+
			
 
				+	pci_reset_function(assigned_dev->dev);
			
 
				+
			
 
				+	pci_release_regions(assigned_dev->dev);
			
 
				+	pci_disable_device(assigned_dev->dev);
			
 
				+	pci_dev_put(assigned_dev->dev);
			
 
				+
			
 
				+	list_del(&assigned_dev->list);
			
 
				+	kfree(assigned_dev);
			
 
				+}
			
 
				+
			
 
				+void kvm_free_all_assigned_devices(struct kvm *kvm)
			
 
				+{
			
 
				+	struct list_head *ptr, *ptr2;
			
 
				+	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				+
			
 
				+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
			
 
				+		assigned_dev = list_entry(ptr,
			
 
				+					  struct kvm_assigned_dev_kernel,
			
 
				+					  list);
			
 
				+
			
 
				+		kvm_free_assigned_device(kvm, assigned_dev);
			
 
				+	}
			
 
				+}
			
 
				+
			
 
				+static int assigned_device_enable_host_intx(struct kvm *kvm,
			
 
				+					    struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	dev->host_irq = dev->dev->irq;
			
 
				+	/* Even though this is PCI, we don't want to use shared
			
 
				+	 * interrupts. Sharing host devices with guest-assigned devices
			
 
				+	 * on the same interrupt line is not a happy situation: there
			
 
				+	 * are going to be long delays in accepting, acking, etc.
			
 
				+	 */
			
 
				+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
			
 
				+			0, "kvm_assigned_intx_device", (void *)dev))
			
 
				+		return -EIO;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+static int assigned_device_enable_host_msi(struct kvm *kvm,
			
 
				+					   struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	int r;
			
 
				+
			
 
				+	if (!dev->dev->msi_enabled) {
			
 
				+		r = pci_enable_msi(dev->dev);
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+	}
			
 
				+
			
 
				+	dev->host_irq = dev->dev->irq;
			
 
				+	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
			
 
				+			"kvm_assigned_msi_device", (void *)dev)) {
			
 
				+		pci_disable_msi(dev->dev);
			
 
				+		return -EIO;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int assigned_device_enable_host_msix(struct kvm *kvm,
			
 
				+					    struct kvm_assigned_dev_kernel *dev)
			
 
				+{
			
 
				+	int i, r = -EINVAL;
			
 
				+
			
 
				+	/* host_msix_entries and guest_msix_entries should have been
			
 
				+	 * initialized */
			
 
				+	if (dev->entries_nr == 0)
			
 
				+		return r;
			
 
				+
			
 
				+	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
			
 
				+	if (r)
			
 
				+		return r;
			
 
				+
			
 
				+	for (i = 0; i < dev->entries_nr; i++) {
			
 
				+		r = request_irq(dev->host_msix_entries[i].vector,
			
 
				+				kvm_assigned_dev_intr, 0,
			
 
				+				"kvm_assigned_msix_device",
			
 
				+				(void *)dev);
			
 
				+		/* FIXME: free requested_irq's on failure */
			
 
				+		if (r)
			
 
				+			return r;
			
 
				+	}
			
 
				+
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#endif
			
 
				+
			
 
				+static int assigned_device_enable_guest_intx(struct kvm *kvm,
			
 
				+				struct kvm_assigned_dev_kernel *dev,
			
 
				+				struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = irq->guest_irq;
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+static int assigned_device_enable_guest_msi(struct kvm *kvm,
			
 
				+			struct kvm_assigned_dev_kernel *dev,
			
 
				+			struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int assigned_device_enable_guest_msix(struct kvm *kvm,
			
 
				+			struct kvm_assigned_dev_kernel *dev,
			
 
				+			struct kvm_assigned_irq *irq)
			
 
				+{
			
 
				+	dev->guest_irq = irq->guest_irq;
			
 
				+	dev->ack_notifier.gsi = -1;
			
 
				+	dev->host_irq_disabled = false;
			
 
				+	return 0;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+static int assign_host_irq(struct kvm *kvm,
			
 
				+			   struct kvm_assigned_dev_kernel *dev,
			
 
				+			   __u32 host_irq_type)
			
 
				+{
			
 
				+	int r = -EEXIST;
			
 
				+
			
 
				+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
			
 
				+		return r;
			
 
				+
			
 
				+	switch (host_irq_type) {
			
 
				+	case KVM_DEV_IRQ_HOST_INTX:
			
 
				+		r = assigned_device_enable_host_intx(kvm, dev);
			
 
				+		break;
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+	case KVM_DEV_IRQ_HOST_MSI:
			
 
				+		r = assigned_device_enable_host_msi(kvm, dev);
			
 
				+		break;
			
 
				+#endif
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_DEV_IRQ_HOST_MSIX:
			
 
				+		r = assigned_device_enable_host_msix(kvm, dev);
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		r = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (!r)
			
 
				+		dev->irq_requested_type |= host_irq_type;
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int assign_guest_irq(struct kvm *kvm,
			
 
				+			    struct kvm_assigned_dev_kernel *dev,
			
 
				+			    struct kvm_assigned_irq *irq,
			
 
				+			    unsigned long guest_irq_type)
			
 
				+{
			
 
				+	int id;
			
 
				+	int r = -EEXIST;
			
 
				+
			
 
				+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
			
 
				+		return r;
			
 
				+
			
 
				+	id = kvm_request_irq_source_id(kvm);
			
 
				+	if (id < 0)
			
 
				+		return id;
			
 
				+
			
 
				+	dev->irq_source_id = id;
			
 
				+
			
 
				+	switch (guest_irq_type) {
			
 
				+	case KVM_DEV_IRQ_GUEST_INTX:
			
 
				+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
			
 
				+		break;
			
 
				+#ifdef __KVM_HAVE_MSI
			
 
				+	case KVM_DEV_IRQ_GUEST_MSI:
			
 
				+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
			
 
				+		break;
			
 
				+#endif
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_DEV_IRQ_GUEST_MSIX:
			
 
				+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
			
 
				+		break;
			
 
				+#endif
			
 
				+	default:
			
 
				+		r = -EINVAL;
			
 
				+	}
			
 
				+
			
 
				+	if (!r) {
			
 
				+		dev->irq_requested_type |= guest_irq_type;
			
 
				+		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
			
 
				+	} else
			
 
				+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
			
 
				+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
			
 
				+				   struct kvm_assigned_irq *assigned_irq)
			
 
				+{
			
 
				+	int r = -EINVAL;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+	unsigned long host_irq_type, guest_irq_type;
			
 
				+
			
 
				+	if (!capable(CAP_SYS_RAWIO))
			
 
				+		return -EPERM;
			
 
				+
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		return r;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+	r = -ENODEV;
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_irq->assigned_dev_id);
			
 
				+	if (!match)
			
 
				+		goto out;
			
 
				+
			
 
				+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
			
 
				+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
			
 
				+
			
 
				+	r = -EINVAL;
			
 
				+	/* can only assign one type at a time */
			
 
				+	if (hweight_long(host_irq_type) > 1)
			
 
				+		goto out;
			
 
				+	if (hweight_long(guest_irq_type) > 1)
			
 
				+		goto out;
			
 
				+	if (host_irq_type == 0 && guest_irq_type == 0)
			
 
				+		goto out;
			
 
				+
			
 
				+	r = 0;
			
 
				+	if (host_irq_type)
			
 
				+		r = assign_host_irq(kvm, match, host_irq_type);
			
 
				+	if (r)
			
 
				+		goto out;
			
 
				+
			
 
				+	if (guest_irq_type)
			
 
				+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
			
 
				+					 struct kvm_assigned_irq
			
 
				+					 *assigned_irq)
			
 
				+{
			
 
				+	int r = -ENODEV;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_irq->assigned_dev_id);
			
 
				+	if (!match)
			
 
				+		goto out;
			
 
				+
			
 
				+	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
			
 
				+				      struct kvm_assigned_pci_dev *assigned_dev)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+	struct pci_dev *dev;
			
 
				+
			
 
				+	down_read(&kvm->slots_lock);
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_dev->assigned_dev_id);
			
 
				+	if (match) {
			
 
				+		/* device already assigned */
			
 
				+		r = -EEXIST;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
			
 
				+	if (match == NULL) {
			
 
				+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
			
 
				+		       __func__);
			
 
				+		r = -ENOMEM;
			
 
				+		goto out;
			
 
				+	}
			
 
				+	dev = pci_get_bus_and_slot(assigned_dev->busnr,
			
 
				+				   assigned_dev->devfn);
			
 
				+	if (!dev) {
			
 
				+		printk(KERN_INFO "%s: host device not found\n", __func__);
			
 
				+		r = -EINVAL;
			
 
				+		goto out_free;
			
 
				+	}
			
 
				+	if (pci_enable_device(dev)) {
			
 
				+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
			
 
				+		r = -EBUSY;
			
 
				+		goto out_put;
			
 
				+	}
			
 
				+	r = pci_request_regions(dev, "kvm_assigned_device");
			
 
				+	if (r) {
			
 
				+		printk(KERN_INFO "%s: Could not get access to device regions\n",
			
 
				+		       __func__);
			
 
				+		goto out_disable;
			
 
				+	}
			
 
				+
			
 
				+	pci_reset_function(dev);
			
 
				+
			
 
				+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
			
 
				+	match->host_busnr = assigned_dev->busnr;
			
 
				+	match->host_devfn = assigned_dev->devfn;
			
 
				+	match->flags = assigned_dev->flags;
			
 
				+	match->dev = dev;
			
 
				+	spin_lock_init(&match->assigned_dev_lock);
			
 
				+	match->irq_source_id = -1;
			
 
				+	match->kvm = kvm;
			
 
				+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
			
 
				+	INIT_WORK(&match->interrupt_work,
			
 
				+		  kvm_assigned_dev_interrupt_work_handler);
			
 
				+
			
 
				+	list_add(&match->list, &kvm->arch.assigned_dev_head);
			
 
				+
			
 
				+	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
			
 
				+		if (!kvm->arch.iommu_domain) {
			
 
				+			r = kvm_iommu_map_guest(kvm);
			
 
				+			if (r)
			
 
				+				goto out_list_del;
			
 
				+		}
			
 
				+		r = kvm_assign_device(kvm, match);
			
 
				+		if (r)
			
 
				+			goto out_list_del;
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	up_read(&kvm->slots_lock);
			
 
				+	return r;
			
 
				+out_list_del:
			
 
				+	list_del(&match->list);
			
 
				+	pci_release_regions(dev);
			
 
				+out_disable:
			
 
				+	pci_disable_device(dev);
			
 
				+out_put:
			
 
				+	pci_dev_put(dev);
			
 
				+out_free:
			
 
				+	kfree(match);
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	up_read(&kvm->slots_lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
			
 
				+		struct kvm_assigned_pci_dev *assigned_dev)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *match;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      assigned_dev->assigned_dev_id);
			
 
				+	if (!match) {
			
 
				+		printk(KERN_INFO "%s: device hasn't been assigned before, "
			
 
				+		  "so cannot be deassigned\n", __func__);
			
 
				+		r = -EINVAL;
			
 
				+		goto out;
			
 
				+	}
			
 
				+
			
 
				+	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
			
 
				+		kvm_deassign_device(kvm, match);
			
 
				+
			
 
				+	kvm_free_assigned_device(kvm, match);
			
 
				+
			
 
				+out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
			
 
				+				    struct kvm_assigned_msix_nr *entry_nr)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+	struct kvm_assigned_dev_kernel *adev;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      entry_nr->assigned_dev_id);
			
 
				+	if (!adev) {
			
 
				+		r = -EINVAL;
			
 
				+		goto msix_nr_out;
			
 
				+	}
			
 
				+
			
 
				+	if (adev->entries_nr == 0) {
			
 
				+		adev->entries_nr = entry_nr->entry_nr;
			
 
				+		if (adev->entries_nr == 0 ||
			
 
				+		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
			
 
				+			r = -EINVAL;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+
			
 
				+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
			
 
				+						entry_nr->entry_nr,
			
 
				+						GFP_KERNEL);
			
 
				+		if (!adev->host_msix_entries) {
			
 
				+			r = -ENOMEM;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+		adev->guest_msix_entries = kzalloc(
			
 
				+				sizeof(struct kvm_guest_msix_entry) *
			
 
				+				entry_nr->entry_nr, GFP_KERNEL);
			
 
				+		if (!adev->guest_msix_entries) {
			
 
				+			kfree(adev->host_msix_entries);
			
 
				+			r = -ENOMEM;
			
 
				+			goto msix_nr_out;
			
 
				+		}
			
 
				+	} else /* Not allowed set MSI-X number twice */
			
 
				+		r = -EINVAL;
			
 
				+msix_nr_out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
			
 
				+				       struct kvm_assigned_msix_entry *entry)
			
 
				+{
			
 
				+	int r = 0, i;
			
 
				+	struct kvm_assigned_dev_kernel *adev;
			
 
				+
			
 
				+	mutex_lock(&kvm->lock);
			
 
				+
			
 
				+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				+				      entry->assigned_dev_id);
			
 
				+
			
 
				+	if (!adev) {
			
 
				+		r = -EINVAL;
			
 
				+		goto msix_entry_out;
			
 
				+	}
			
 
				+
			
 
				+	for (i = 0; i < adev->entries_nr; i++)
			
 
				+		if (adev->guest_msix_entries[i].vector == 0 ||
			
 
				+		    adev->guest_msix_entries[i].entry == entry->entry) {
			
 
				+			adev->guest_msix_entries[i].entry = entry->entry;
			
 
				+			adev->guest_msix_entries[i].vector = entry->gsi;
			
 
				+			adev->host_msix_entries[i].entry = entry->entry;
			
 
				+			break;
			
 
				+		}
			
 
				+	if (i == adev->entries_nr) {
			
 
				+		r = -ENOSPC;
			
 
				+		goto msix_entry_out;
			
 
				+	}
			
 
				+
			
 
				+msix_entry_out:
			
 
				+	mutex_unlock(&kvm->lock);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
			
 
				+				  unsigned long arg)
			
 
				+{
			
 
				+	void __user *argp = (void __user *)arg;
			
 
				+	int r = -ENOTTY;
			
 
				+
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_ASSIGN_PCI_DEVICE: {
			
 
				+		struct kvm_assigned_pci_dev assigned_dev;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_ASSIGN_IRQ: {
			
 
				+		r = -EOPNOTSUPP;
			
 
				+		break;
			
 
				+	}
			
 
				+#ifdef KVM_CAP_ASSIGN_DEV_IRQ
			
 
				+	case KVM_ASSIGN_DEV_IRQ: {
			
 
				+		struct kvm_assigned_irq assigned_irq;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_DEASSIGN_DEV_IRQ: {
			
 
				+		struct kvm_assigned_irq assigned_irq;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
			
 
				+	case KVM_DEASSIGN_PCI_DEVICE: {
			
 
				+		struct kvm_assigned_pci_dev assigned_dev;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+#ifdef KVM_CAP_IRQ_ROUTING
			
 
				+	case KVM_SET_GSI_ROUTING: {
			
 
				+		struct kvm_irq_routing routing;
			
 
				+		struct kvm_irq_routing __user *urouting;
			
 
				+		struct kvm_irq_routing_entry *entries;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&routing, argp, sizeof(routing)))
			
 
				+			goto out;
			
 
				+		r = -EINVAL;
			
 
				+		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
			
 
				+			goto out;
			
 
				+		if (routing.flags)
			
 
				+			goto out;
			
 
				+		r = -ENOMEM;
			
 
				+		entries = vmalloc(routing.nr * sizeof(*entries));
			
 
				+		if (!entries)
			
 
				+			goto out;
			
 
				+		r = -EFAULT;
			
 
				+		urouting = argp;
			
 
				+		if (copy_from_user(entries, urouting->entries,
			
 
				+				   routing.nr * sizeof(*entries)))
			
 
				+			goto out_free_irq_routing;
			
 
				+		r = kvm_set_irq_routing(kvm, entries, routing.nr,
			
 
				+					routing.flags);
			
 
				+	out_free_irq_routing:
			
 
				+		vfree(entries);
			
 
				+		break;
			
 
				+	}
			
 
				+#endif /* KVM_CAP_IRQ_ROUTING */
			
 
				+#ifdef __KVM_HAVE_MSIX
			
 
				+	case KVM_ASSIGN_SET_MSIX_NR: {
			
 
				+		struct kvm_assigned_msix_nr entry_nr;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
			
 
				+		struct kvm_assigned_msix_entry entry;
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&entry, argp, sizeof entry))
			
 
				+			goto out;
			
 
				+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+#endif
			
 
				+	}
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+
			
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -61,10 +61,8 @@ irqfd_inject(struct work_struct *work)
 
				 	struct _irqfd *irqfd = container_of(work, struct _irqfd, inject);
			
 
				 	struct kvm *kvm = irqfd->kvm;
			
 
				 
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 1);
			
 
				 	kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, irqfd->gsi, 0);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
 
				 /*
			
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -182,6 +182,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
				 	union kvm_ioapic_redirect_entry entry;
			
 
				 	int ret = 1;
			
 
				 
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
			
 
				 		entry = ioapic->redirtbl[irq];
			
 
				 		level ^= entry.fields.polarity;
			
@@ -198,34 +199,51 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 
				 		}
			
 
				 		trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
			
 
				 	}
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				-static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int pin,
			
 
				-				    int trigger_mode)
			
 
				+static void __kvm_ioapic_update_eoi(struct kvm_ioapic *ioapic, int vector,
			
 
				+				     int trigger_mode)
			
 
				 {
			
 
				-	union kvm_ioapic_redirect_entry *ent;
			
 
				+	int i;
			
 
				+
			
 
				+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
			
 
				+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
			
 
				 
			
 
				-	ent = &ioapic->redirtbl[pin];
			
 
				+		if (ent->fields.vector != vector)
			
 
				+			continue;
			
 
				 
			
 
				-	kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, pin);
			
 
				+		/*
			
 
				+		 * We are dropping lock while calling ack notifiers because ack
			
 
				+		 * notifier callbacks for assigned devices call into IOAPIC
			
 
				+		 * recursively. Since remote_irr is cleared only after call
			
 
				+		 * to notifiers if the same vector will be delivered while lock
			
 
				+		 * is dropped it will be put into irr and will be delivered
			
 
				+		 * after ack notifier returns.
			
 
				+		 */
			
 
				+		mutex_unlock(&ioapic->lock);
			
 
				+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
			
 
				+		mutex_lock(&ioapic->lock);
			
 
				+
			
 
				+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
			
 
				+			continue;
			
 
				 
			
 
				-	if (trigger_mode == IOAPIC_LEVEL_TRIG) {
			
 
				 		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
			
 
				 		ent->fields.remote_irr = 0;
			
 
				-		if (!ent->fields.mask && (ioapic->irr & (1 << pin)))
			
 
				-			ioapic_service(ioapic, pin);
			
 
				+		if (!ent->fields.mask && (ioapic->irr & (1 << i)))
			
 
				+			ioapic_service(ioapic, i);
			
 
				 	}
			
 
				 }
			
 
				 
			
 
				 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int trigger_mode)
			
 
				 {
			
 
				 	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				-	int i;
			
 
				 
			
 
				-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
			
 
				-		if (ioapic->redirtbl[i].fields.vector == vector)
			
 
				-			__kvm_ioapic_update_eoi(ioapic, i, trigger_mode);
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	__kvm_ioapic_update_eoi(ioapic, vector, trigger_mode);
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				 }
			
 
				 
			
 
				 static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
			
@@ -250,8 +268,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	ioapic_debug("addr %lx\n", (unsigned long)addr);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				-	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	addr &= 0xff;
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	switch (addr) {
			
 
				 	case IOAPIC_REG_SELECT:
			
 
				 		result = ioapic->ioregsel;
			
@@ -265,6 +283,8 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		result = 0;
			
 
				 		break;
			
 
				 	}
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+
			
 
				 	switch (len) {
			
 
				 	case 8:
			
 
				 		*(u64 *) val = result;
			
@@ -277,7 +297,6 @@ static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 
				 	default:
			
 
				 		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
			
 
				 	}
			
 
				-	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -293,15 +312,15 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		     (void*)addr, len, val);
			
 
				 	ASSERT(!(addr & 0xf));	/* check alignment */
			
 
				 
			
 
				-	mutex_lock(&ioapic->kvm->irq_lock);
			
 
				 	if (len == 4 || len == 8)
			
 
				 		data = *(u32 *) val;
			
 
				 	else {
			
 
				 		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
			
 
				-		goto unlock;
			
 
				+		return 0;
			
 
				 	}
			
 
				 
			
 
				 	addr &= 0xff;
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				 	switch (addr) {
			
 
				 	case IOAPIC_REG_SELECT:
			
 
				 		ioapic->ioregsel = data;
			
@@ -312,15 +331,14 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 
				 		break;
			
 
				 #ifdef	CONFIG_IA64
			
 
				 	case IOAPIC_REG_EOI:
			
 
				-		kvm_ioapic_update_eoi(ioapic->kvm, data, IOAPIC_LEVEL_TRIG);
			
 
				+		__kvm_ioapic_update_eoi(ioapic, data, IOAPIC_LEVEL_TRIG);
			
 
				 		break;
			
 
				 #endif
			
 
				 
			
 
				 	default:
			
 
				 		break;
			
 
				 	}
			
 
				-unlock:
			
 
				-	mutex_unlock(&ioapic->kvm->irq_lock);
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -349,6 +367,7 @@ int kvm_ioapic_init(struct kvm *kvm)
 
				 	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
			
 
				 	if (!ioapic)
			
 
				 		return -ENOMEM;
			
 
				+	mutex_init(&ioapic->lock);
			
 
				 	kvm->arch.vioapic = ioapic;
			
 
				 	kvm_ioapic_reset(ioapic);
			
 
				 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
			
@@ -360,3 +379,26 @@ int kvm_ioapic_init(struct kvm *kvm)
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
			
 
				+	if (!ioapic)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+	return 0;
			
 
				+}
			
 
				+
			
 
				+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
			
 
				+{
			
 
				+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
			
 
				+	if (!ioapic)
			
 
				+		return -EINVAL;
			
 
				+
			
 
				+	mutex_lock(&ioapic->lock);
			
 
				+	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
			
 
				+	mutex_unlock(&ioapic->lock);
			
 
				+	return 0;
			
 
				+}
			
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -41,9 +41,11 @@ struct kvm_ioapic {
 
				 	u32 irr;
			
 
				 	u32 pad;
			
 
				 	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
			
 
				+	unsigned long irq_states[IOAPIC_NUM_PINS];
			
 
				 	struct kvm_io_device dev;
			
 
				 	struct kvm *kvm;
			
 
				 	void (*ack_notifier)(void *opaque, int irq);
			
 
				+	struct mutex lock;
			
 
				 };
			
 
				 
			
 
				 #ifdef DEBUG
			
@@ -73,4 +75,7 @@ int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level);
 
				 void kvm_ioapic_reset(struct kvm_ioapic *ioapic);
			
 
				 int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
			
 
				 		struct kvm_lapic_irq *irq);
			
 
				+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
			
 
				+
			
 
				 #endif
			
--- a/virt/kvm/irq_comm.c
+++ b/virt/kvm/irq_comm.c
@@ -31,20 +31,39 @@
 
				 
			
 
				 #include "ioapic.h"
			
 
				 
			
 
				+static inline int kvm_irq_line_state(unsigned long *irq_state,
			
 
				+				     int irq_source_id, int level)
			
 
				+{
			
 
				+	/* Logical OR for level trig interrupt */
			
 
				+	if (level)
			
 
				+		set_bit(irq_source_id, irq_state);
			
 
				+	else
			
 
				+		clear_bit(irq_source_id, irq_state);
			
 
				+
			
 
				+	return !!(*irq_state);
			
 
				+}
			
 
				+
			
 
				 static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
			
 
				-			   struct kvm *kvm, int level)
			
 
				+			   struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				 #ifdef CONFIG_X86
			
 
				-	return kvm_pic_set_irq(pic_irqchip(kvm), e->irqchip.pin, level);
			
 
				+	struct kvm_pic *pic = pic_irqchip(kvm);
			
 
				+	level = kvm_irq_line_state(&pic->irq_states[e->irqchip.pin],
			
 
				+				   irq_source_id, level);
			
 
				+	return kvm_pic_set_irq(pic, e->irqchip.pin, level);
			
 
				 #else
			
 
				 	return -1;
			
 
				 #endif
			
 
				 }
			
 
				 
			
 
				 static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
			
 
				-			      struct kvm *kvm, int level)
			
 
				+			      struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				-	return kvm_ioapic_set_irq(kvm->arch.vioapic, e->irqchip.pin, level);
			
 
				+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
			
 
				+	level = kvm_irq_line_state(&ioapic->irq_states[e->irqchip.pin],
			
 
				+				   irq_source_id, level);
			
 
				+
			
 
				+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, level);
			
 
				 }
			
 
				 
			
 
				 inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
			
@@ -63,8 +82,6 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 	int i, r = -1;
			
 
				 	struct kvm_vcpu *vcpu, *lowest = NULL;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				 	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
			
 
				 			kvm_is_dm_lowest_prio(irq))
			
 
				 		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
			
@@ -96,10 +113,13 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
 
				 }
			
 
				 
			
 
				 static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
			
 
				-		       struct kvm *kvm, int level)
			
 
				+		       struct kvm *kvm, int irq_source_id, int level)
			
 
				 {
			
 
				 	struct kvm_lapic_irq irq;
			
 
				 
			
 
				+	if (!level)
			
 
				+		return -1;
			
 
				+
			
 
				 	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
			
 
				 
			
 
				 	irq.dest_id = (e->msi.address_lo &
			
@@ -116,78 +136,67 @@ static int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
 
				 	return kvm_irq_delivery_to_apic(kvm, NULL, &irq);
			
 
				 }
			
 
				 
			
 
				-/* This should be called with the kvm->irq_lock mutex held
			
 
				+/*
			
 
				  * Return value:
			
 
				  *  < 0   Interrupt was ignored (masked or not delivered for other reasons)
			
 
				  *  = 0   Interrupt was coalesced (previous irq is still pending)
			
 
				  *  > 0   Number of CPUs interrupt was delivered to
			
 
				  */
			
 
				-int kvm_set_irq(struct kvm *kvm, int irq_source_id, int irq, int level)
			
 
				+int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level)
			
 
				 {
			
 
				-	struct kvm_kernel_irq_routing_entry *e;
			
 
				-	unsigned long *irq_state, sig_level;
			
 
				-	int ret = -1;
			
 
				+	struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS];
			
 
				+	int ret = -1, i = 0;
			
 
				+	struct kvm_irq_routing_table *irq_rt;
			
 
				+	struct hlist_node *n;
			
 
				 
			
 
				 	trace_kvm_set_irq(irq, level, irq_source_id);
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				-	if (irq < KVM_IOAPIC_NUM_PINS) {
			
 
				-		irq_state = (unsigned long *)&kvm->arch.irq_states[irq];
			
 
				-
			
 
				-		/* Logical OR for level trig interrupt */
			
 
				-		if (level)
			
 
				-			set_bit(irq_source_id, irq_state);
			
 
				-		else
			
 
				-			clear_bit(irq_source_id, irq_state);
			
 
				-		sig_level = !!(*irq_state);
			
 
				-	} else if (!level)
			
 
				-		return ret;
			
 
				-	else /* Deal with MSI/MSI-X */
			
 
				-		sig_level = 1;
			
 
				-
			
 
				 	/* Not possible to detect if the guest uses the PIC or the
			
 
				 	 * IOAPIC.  So set the bit in both. The guest will ignore
			
 
				 	 * writes to the unused one.
			
 
				 	 */
			
 
				-	list_for_each_entry(e, &kvm->irq_routing, link)
			
 
				-		if (e->gsi == irq) {
			
 
				-			int r = e->set(e, kvm, sig_level);
			
 
				-			if (r < 0)
			
 
				-				continue;
			
 
				+	rcu_read_lock();
			
 
				+	irq_rt = rcu_dereference(kvm->irq_routing);
			
 
				+	if (irq < irq_rt->nr_rt_entries)
			
 
				+		hlist_for_each_entry(e, n, &irq_rt->map[irq], link)
			
 
				+			irq_set[i++] = *e;
			
 
				+	rcu_read_unlock();
			
 
				+
			
 
				+	while(i--) {
			
 
				+		int r;
			
 
				+		r = irq_set[i].set(&irq_set[i], kvm, irq_source_id, level);
			
 
				+		if (r < 0)
			
 
				+			continue;
			
 
				+
			
 
				+		ret = r + ((ret < 0) ? 0 : ret);
			
 
				+	}
			
 
				 
			
 
				-			ret = r + ((ret < 0) ? 0 : ret);
			
 
				-		}
			
 
				 	return ret;
			
 
				 }
			
 
				 
			
 
				 void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin)
			
 
				 {
			
 
				-	struct kvm_kernel_irq_routing_entry *e;
			
 
				 	struct kvm_irq_ack_notifier *kian;
			
 
				 	struct hlist_node *n;
			
 
				-	unsigned gsi = pin;
			
 
				+	int gsi;
			
 
				 
			
 
				 	trace_kvm_ack_irq(irqchip, pin);
			
 
				 
			
 
				-	list_for_each_entry(e, &kvm->irq_routing, link)
			
 
				-		if (e->type == KVM_IRQ_ROUTING_IRQCHIP &&
			
 
				-		    e->irqchip.irqchip == irqchip &&
			
 
				-		    e->irqchip.pin == pin) {
			
 
				-			gsi = e->gsi;
			
 
				-			break;
			
 
				-		}
			
 
				-
			
 
				-	hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list, link)
			
 
				-		if (kian->gsi == gsi)
			
 
				-			kian->irq_acked(kian);
			
 
				+	rcu_read_lock();
			
 
				+	gsi = rcu_dereference(kvm->irq_routing)->chip[irqchip][pin];
			
 
				+	if (gsi != -1)
			
 
				+		hlist_for_each_entry_rcu(kian, n, &kvm->irq_ack_notifier_list,
			
 
				+					 link)
			
 
				+			if (kian->gsi == gsi)
			
 
				+				kian->irq_acked(kian);
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				 void kvm_register_irq_ack_notifier(struct kvm *kvm,
			
 
				 				   struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
			
 
				+	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -195,8 +204,9 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 
				 				    struct kvm_irq_ack_notifier *kian)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_del_init(&kian->link);
			
 
				+	hlist_del_init_rcu(&kian->link);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 }
			
 
				 
			
 
				 int kvm_request_irq_source_id(struct kvm *kvm)
			
@@ -205,16 +215,17 @@ int kvm_request_irq_source_id(struct kvm *kvm)
 
				 	int irq_source_id;
			
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	irq_source_id = find_first_zero_bit(bitmap,
			
 
				-				sizeof(kvm->arch.irq_sources_bitmap));
			
 
				+	irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
			
 
				 
			
 
				-	if (irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
 
				+	if (irq_source_id >= BITS_PER_LONG) {
			
 
				 		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
			
 
				-		return -EFAULT;
			
 
				+		irq_source_id = -EFAULT;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				 
			
 
				 	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
			
 
				 	set_bit(irq_source_id, bitmap);
			
 
				+unlock:
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 
			
 
				 	return irq_source_id;
			
@@ -228,13 +239,23 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				 	if (irq_source_id < 0 ||
			
 
				-	    irq_source_id >= sizeof(kvm->arch.irq_sources_bitmap)) {
			
 
				+	    irq_source_id >= BITS_PER_LONG) {
			
 
				 		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
			
 
				-		return;
			
 
				+		goto unlock;
			
 
				 	}
			
 
				-	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
			
 
				-		clear_bit(irq_source_id, &kvm->arch.irq_states[i]);
			
 
				 	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
			
 
				+	if (!irqchip_in_kernel(kvm))
			
 
				+		goto unlock;
			
 
				+
			
 
				+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++) {
			
 
				+		clear_bit(irq_source_id, &kvm->arch.vioapic->irq_states[i]);
			
 
				+		if (i >= 16)
			
 
				+			continue;
			
 
				+#ifdef CONFIG_X86
			
 
				+		clear_bit(irq_source_id, &pic_irqchip(kvm)->irq_states[i]);
			
 
				+#endif
			
 
				+	}
			
 
				+unlock:
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -243,7 +264,7 @@ void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				 	kimn->irq = irq;
			
 
				-	hlist_add_head(&kimn->link, &kvm->mask_notifier_list);
			
 
				+	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				 }
			
 
				 
			
@@ -251,8 +272,9 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
 
				 				      struct kvm_irq_mask_notifier *kimn)
			
 
				 {
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	hlist_del(&kimn->link);
			
 
				+	hlist_del_rcu(&kimn->link);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 }
			
 
				 
			
 
				 void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
			
@@ -260,33 +282,37 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, int irq, bool mask)
 
				 	struct kvm_irq_mask_notifier *kimn;
			
 
				 	struct hlist_node *n;
			
 
				 
			
 
				-	WARN_ON(!mutex_is_locked(&kvm->irq_lock));
			
 
				-
			
 
				-	hlist_for_each_entry(kimn, n, &kvm->mask_notifier_list, link)
			
 
				+	rcu_read_lock();
			
 
				+	hlist_for_each_entry_rcu(kimn, n, &kvm->mask_notifier_list, link)
			
 
				 		if (kimn->irq == irq)
			
 
				 			kimn->func(kimn, mask);
			
 
				-}
			
 
				-
			
 
				-static void __kvm_free_irq_routing(struct list_head *irq_routing)
			
 
				-{
			
 
				-	struct kvm_kernel_irq_routing_entry *e, *n;
			
 
				-
			
 
				-	list_for_each_entry_safe(e, n, irq_routing, link)
			
 
				-		kfree(e);
			
 
				+	rcu_read_unlock();
			
 
				 }
			
 
				 
			
 
				 void kvm_free_irq_routing(struct kvm *kvm)
			
 
				 {
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				-	__kvm_free_irq_routing(&kvm->irq_routing);
			
 
				-	mutex_unlock(&kvm->irq_lock);
			
 
				+	/* Called only during vm destruction. Nobody can use the pointer
			
 
				+	   at this stage */
			
 
				+	kfree(kvm->irq_routing);
			
 
				 }
			
 
				 
			
 
				-static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
			
 
				+static int setup_routing_entry(struct kvm_irq_routing_table *rt,
			
 
				+			       struct kvm_kernel_irq_routing_entry *e,
			
 
				 			       const struct kvm_irq_routing_entry *ue)
			
 
				 {
			
 
				 	int r = -EINVAL;
			
 
				 	int delta;
			
 
				+	struct kvm_kernel_irq_routing_entry *ei;
			
 
				+	struct hlist_node *n;
			
 
				+
			
 
				+	/*
			
 
				+	 * Do not allow GSI to be mapped to the same irqchip more than once.
			
 
				+	 * Allow only one to one mapping between GSI and MSI.
			
 
				+	 */
			
 
				+	hlist_for_each_entry(ei, n, &rt->map[ue->gsi], link)
			
 
				+		if (ei->type == KVM_IRQ_ROUTING_MSI ||
			
 
				+		    ue->u.irqchip.irqchip == ei->irqchip.irqchip)
			
 
				+			return r;
			
 
				 
			
 
				 	e->gsi = ue->gsi;
			
 
				 	e->type = ue->type;
			
@@ -309,6 +335,9 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 
				 		}
			
 
				 		e->irqchip.irqchip = ue->u.irqchip.irqchip;
			
 
				 		e->irqchip.pin = ue->u.irqchip.pin + delta;
			
 
				+		if (e->irqchip.pin >= KVM_IOAPIC_NUM_PINS)
			
 
				+			goto out;
			
 
				+		rt->chip[ue->u.irqchip.irqchip][e->irqchip.pin] = ue->gsi;
			
 
				 		break;
			
 
				 	case KVM_IRQ_ROUTING_MSI:
			
 
				 		e->set = kvm_set_msi;
			
@@ -319,6 +348,8 @@ static int setup_routing_entry(struct kvm_kernel_irq_routing_entry *e,
 
				 	default:
			
 
				 		goto out;
			
 
				 	}
			
 
				+
			
 
				+	hlist_add_head(&e->link, &rt->map[e->gsi]);
			
 
				 	r = 0;
			
 
				 out:
			
 
				 	return r;
			
@@ -330,43 +361,53 @@ int kvm_set_irq_routing(struct kvm *kvm,
 
				 			unsigned nr,
			
 
				 			unsigned flags)
			
 
				 {
			
 
				-	struct list_head irq_list = LIST_HEAD_INIT(irq_list);
			
 
				-	struct list_head tmp = LIST_HEAD_INIT(tmp);
			
 
				-	struct kvm_kernel_irq_routing_entry *e = NULL;
			
 
				-	unsigned i;
			
 
				+	struct kvm_irq_routing_table *new, *old;
			
 
				+	u32 i, j, nr_rt_entries = 0;
			
 
				 	int r;
			
 
				 
			
 
				+	for (i = 0; i < nr; ++i) {
			
 
				+		if (ue[i].gsi >= KVM_MAX_IRQ_ROUTES)
			
 
				+			return -EINVAL;
			
 
				+		nr_rt_entries = max(nr_rt_entries, ue[i].gsi);
			
 
				+	}
			
 
				+
			
 
				+	nr_rt_entries += 1;
			
 
				+
			
 
				+	new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head))
			
 
				+		      + (nr * sizeof(struct kvm_kernel_irq_routing_entry)),
			
 
				+		      GFP_KERNEL);
			
 
				+
			
 
				+	if (!new)
			
 
				+		return -ENOMEM;
			
 
				+
			
 
				+	new->rt_entries = (void *)&new->map[nr_rt_entries];
			
 
				+
			
 
				+	new->nr_rt_entries = nr_rt_entries;
			
 
				+	for (i = 0; i < 3; i++)
			
 
				+		for (j = 0; j < KVM_IOAPIC_NUM_PINS; j++)
			
 
				+			new->chip[i][j] = -1;
			
 
				+
			
 
				 	for (i = 0; i < nr; ++i) {
			
 
				 		r = -EINVAL;
			
 
				-		if (ue->gsi >= KVM_MAX_IRQ_ROUTES)
			
 
				-			goto out;
			
 
				 		if (ue->flags)
			
 
				 			goto out;
			
 
				-		r = -ENOMEM;
			
 
				-		e = kzalloc(sizeof(*e), GFP_KERNEL);
			
 
				-		if (!e)
			
 
				-			goto out;
			
 
				-		r = setup_routing_entry(e, ue);
			
 
				+		r = setup_routing_entry(new, &new->rt_entries[i], ue);
			
 
				 		if (r)
			
 
				 			goto out;
			
 
				 		++ue;
			
 
				-		list_add(&e->link, &irq_list);
			
 
				-		e = NULL;
			
 
				 	}
			
 
				 
			
 
				 	mutex_lock(&kvm->irq_lock);
			
 
				-	list_splice(&kvm->irq_routing, &tmp);
			
 
				-	INIT_LIST_HEAD(&kvm->irq_routing);
			
 
				-	list_splice(&irq_list, &kvm->irq_routing);
			
 
				-	INIT_LIST_HEAD(&irq_list);
			
 
				-	list_splice(&tmp, &irq_list);
			
 
				+	old = kvm->irq_routing;
			
 
				+	rcu_assign_pointer(kvm->irq_routing, new);
			
 
				 	mutex_unlock(&kvm->irq_lock);
			
 
				+	synchronize_rcu();
			
 
				 
			
 
				+	new = old;
			
 
				 	r = 0;
			
 
				 
			
 
				 out:
			
 
				-	kfree(e);
			
 
				-	__kvm_free_irq_routing(&irq_list);
			
 
				+	kfree(new);
			
 
				 	return r;
			
 
				 }
			
 
				 
			
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -43,6 +43,7 @@
 
				 #include <linux/swap.h>
			
 
				 #include <linux/bitops.h>
			
 
				 #include <linux/spinlock.h>
			
 
				+#include <linux/compat.h>
			
 
				 
			
 
				 #include <asm/processor.h>
			
 
				 #include <asm/io.h>
			
@@ -53,12 +54,6 @@
 
				 #include "coalesced_mmio.h"
			
 
				 #endif
			
 
				 
			
 
				-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
			
 
				-#include <linux/pci.h>
			
 
				-#include <linux/interrupt.h>
			
 
				-#include "irq.h"
			
 
				-#endif
			
 
				-
			
 
				 #define CREATE_TRACE_POINTS
			
 
				 #include <trace/events/kvm.h>
			
 
				 
			
@@ -75,6 +70,8 @@ DEFINE_SPINLOCK(kvm_lock);
 
				 LIST_HEAD(vm_list);
			
 
				 
			
 
				 static cpumask_var_t cpus_hardware_enabled;
			
 
				+static int kvm_usage_count = 0;
			
 
				+static atomic_t hardware_enable_failed;
			
 
				 
			
 
				 struct kmem_cache *kvm_vcpu_cache;
			
 
				 EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
			
@@ -85,615 +82,13 @@ struct dentry *kvm_debugfs_dir;
 
				 
			
 
				 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
			
 
				 			   unsigned long arg);
			
 
				+static int hardware_enable_all(void);
			
 
				+static void hardware_disable_all(void);
			
 
				 
			
 
				 static bool kvm_rebooting;
			
 
				 
			
 
				 static bool largepages_enabled = true;
			
 
				 
			
 
				-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
			
 
				-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
			
 
				-						      int assigned_dev_id)
			
 
				-{
			
 
				-	struct list_head *ptr;
			
 
				-	struct kvm_assigned_dev_kernel *match;
			
 
				-
			
 
				-	list_for_each(ptr, head) {
			
 
				-		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
			
 
				-		if (match->assigned_dev_id == assigned_dev_id)
			
 
				-			return match;
			
 
				-	}
			
 
				-	return NULL;
			
 
				-}
			
 
				-
			
 
				-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
			
 
				-				    *assigned_dev, int irq)
			
 
				-{
			
 
				-	int i, index;
			
 
				-	struct msix_entry *host_msix_entries;
			
 
				-
			
 
				-	host_msix_entries = assigned_dev->host_msix_entries;
			
 
				-
			
 
				-	index = -1;
			
 
				-	for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				-		if (irq == host_msix_entries[i].vector) {
			
 
				-			index = i;
			
 
				-			break;
			
 
				-		}
			
 
				-	if (index < 0) {
			
 
				-		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
			
 
				-		return 0;
			
 
				-	}
			
 
				-
			
 
				-	return index;
			
 
				-}
			
 
				-
			
 
				-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work)
			
 
				-{
			
 
				-	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				-	struct kvm *kvm;
			
 
				-	int i;
			
 
				-
			
 
				-	assigned_dev = container_of(work, struct kvm_assigned_dev_kernel,
			
 
				-				    interrupt_work);
			
 
				-	kvm = assigned_dev->kvm;
			
 
				-
			
 
				-	mutex_lock(&kvm->irq_lock);
			
 
				-	spin_lock_irq(&assigned_dev->assigned_dev_lock);
			
 
				-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				-		struct kvm_guest_msix_entry *guest_entries =
			
 
				-			assigned_dev->guest_msix_entries;
			
 
				-		for (i = 0; i < assigned_dev->entries_nr; i++) {
			
 
				-			if (!(guest_entries[i].flags &
			
 
				-					KVM_ASSIGNED_MSIX_PENDING))
			
 
				-				continue;
			
 
				-			guest_entries[i].flags &= ~KVM_ASSIGNED_MSIX_PENDING;
			
 
				-			kvm_set_irq(assigned_dev->kvm,
			
 
				-				    assigned_dev->irq_source_id,
			
 
				-				    guest_entries[i].vector, 1);
			
 
				-		}
			
 
				-	} else
			
 
				-		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
			
 
				-			    assigned_dev->guest_irq, 1);
			
 
				-
			
 
				-	spin_unlock_irq(&assigned_dev->assigned_dev_lock);
			
 
				-	mutex_unlock(&assigned_dev->kvm->irq_lock);
			
 
				-}
			
 
				-
			
 
				-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
			
 
				-{
			
 
				-	unsigned long flags;
			
 
				-	struct kvm_assigned_dev_kernel *assigned_dev =
			
 
				-		(struct kvm_assigned_dev_kernel *) dev_id;
			
 
				-
			
 
				-	spin_lock_irqsave(&assigned_dev->assigned_dev_lock, flags);
			
 
				-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				-		int index = find_index_from_host_irq(assigned_dev, irq);
			
 
				-		if (index < 0)
			
 
				-			goto out;
			
 
				-		assigned_dev->guest_msix_entries[index].flags |=
			
 
				-			KVM_ASSIGNED_MSIX_PENDING;
			
 
				-	}
			
 
				-
			
 
				-	schedule_work(&assigned_dev->interrupt_work);
			
 
				-
			
 
				-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
			
 
				-		disable_irq_nosync(irq);
			
 
				-		assigned_dev->host_irq_disabled = true;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	spin_unlock_irqrestore(&assigned_dev->assigned_dev_lock, flags);
			
 
				-	return IRQ_HANDLED;
			
 
				-}
			
 
				-
			
 
				-/* Ack the irq line for an assigned device */
			
 
				-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
			
 
				-{
			
 
				-	struct kvm_assigned_dev_kernel *dev;
			
 
				-	unsigned long flags;
			
 
				-
			
 
				-	if (kian->gsi == -1)
			
 
				-		return;
			
 
				-
			
 
				-	dev = container_of(kian, struct kvm_assigned_dev_kernel,
			
 
				-			   ack_notifier);
			
 
				-
			
 
				-	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
			
 
				-
			
 
				-	/* The guest irq may be shared so this ack may be
			
 
				-	 * from another device.
			
 
				-	 */
			
 
				-	spin_lock_irqsave(&dev->assigned_dev_lock, flags);
			
 
				-	if (dev->host_irq_disabled) {
			
 
				-		enable_irq(dev->host_irq);
			
 
				-		dev->host_irq_disabled = false;
			
 
				-	}
			
 
				-	spin_unlock_irqrestore(&dev->assigned_dev_lock, flags);
			
 
				-}
			
 
				-
			
 
				-static void deassign_guest_irq(struct kvm *kvm,
			
 
				-			       struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				-{
			
 
				-	kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
			
 
				-	assigned_dev->ack_notifier.gsi = -1;
			
 
				-
			
 
				-	if (assigned_dev->irq_source_id != -1)
			
 
				-		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
			
 
				-	assigned_dev->irq_source_id = -1;
			
 
				-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
			
 
				-}
			
 
				-
			
 
				-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
			
 
				-static void deassign_host_irq(struct kvm *kvm,
			
 
				-			      struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				-{
			
 
				-	/*
			
 
				-	 * In kvm_free_device_irq, cancel_work_sync return true if:
			
 
				-	 * 1. work is scheduled, and then cancelled.
			
 
				-	 * 2. work callback is executed.
			
 
				-	 *
			
 
				-	 * The first one ensured that the irq is disabled and no more events
			
 
				-	 * would happen. But for the second one, the irq may be enabled (e.g.
			
 
				-	 * for MSI). So we disable irq here to prevent further events.
			
 
				-	 *
			
 
				-	 * Notice this maybe result in nested disable if the interrupt type is
			
 
				-	 * INTx, but it's OK for we are going to free it.
			
 
				-	 *
			
 
				-	 * If this function is a part of VM destroy, please ensure that till
			
 
				-	 * now, the kvm state is still legal for probably we also have to wait
			
 
				-	 * interrupt_work done.
			
 
				-	 */
			
 
				-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
			
 
				-		int i;
			
 
				-		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				-			disable_irq_nosync(assigned_dev->
			
 
				-					   host_msix_entries[i].vector);
			
 
				-
			
 
				-		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				-
			
 
				-		for (i = 0; i < assigned_dev->entries_nr; i++)
			
 
				-			free_irq(assigned_dev->host_msix_entries[i].vector,
			
 
				-				 (void *)assigned_dev);
			
 
				-
			
 
				-		assigned_dev->entries_nr = 0;
			
 
				-		kfree(assigned_dev->host_msix_entries);
			
 
				-		kfree(assigned_dev->guest_msix_entries);
			
 
				-		pci_disable_msix(assigned_dev->dev);
			
 
				-	} else {
			
 
				-		/* Deal with MSI and INTx */
			
 
				-		disable_irq_nosync(assigned_dev->host_irq);
			
 
				-		cancel_work_sync(&assigned_dev->interrupt_work);
			
 
				-
			
 
				-		free_irq(assigned_dev->host_irq, (void *)assigned_dev);
			
 
				-
			
 
				-		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
			
 
				-			pci_disable_msi(assigned_dev->dev);
			
 
				-	}
			
 
				-
			
 
				-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
			
 
				-}
			
 
				-
			
 
				-static int kvm_deassign_irq(struct kvm *kvm,
			
 
				-			    struct kvm_assigned_dev_kernel *assigned_dev,
			
 
				-			    unsigned long irq_requested_type)
			
 
				-{
			
 
				-	unsigned long guest_irq_type, host_irq_type;
			
 
				-
			
 
				-	if (!irqchip_in_kernel(kvm))
			
 
				-		return -EINVAL;
			
 
				-	/* no irq assignment to deassign */
			
 
				-	if (!assigned_dev->irq_requested_type)
			
 
				-		return -ENXIO;
			
 
				-
			
 
				-	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
			
 
				-	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
			
 
				-
			
 
				-	if (host_irq_type)
			
 
				-		deassign_host_irq(kvm, assigned_dev);
			
 
				-	if (guest_irq_type)
			
 
				-		deassign_guest_irq(kvm, assigned_dev);
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-static void kvm_free_assigned_irq(struct kvm *kvm,
			
 
				-				  struct kvm_assigned_dev_kernel *assigned_dev)
			
 
				-{
			
 
				-	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
			
 
				-}
			
 
				-
			
 
				-static void kvm_free_assigned_device(struct kvm *kvm,
			
 
				-				     struct kvm_assigned_dev_kernel
			
 
				-				     *assigned_dev)
			
 
				-{
			
 
				-	kvm_free_assigned_irq(kvm, assigned_dev);
			
 
				-
			
 
				-	pci_reset_function(assigned_dev->dev);
			
 
				-
			
 
				-	pci_release_regions(assigned_dev->dev);
			
 
				-	pci_disable_device(assigned_dev->dev);
			
 
				-	pci_dev_put(assigned_dev->dev);
			
 
				-
			
 
				-	list_del(&assigned_dev->list);
			
 
				-	kfree(assigned_dev);
			
 
				-}
			
 
				-
			
 
				-void kvm_free_all_assigned_devices(struct kvm *kvm)
			
 
				-{
			
 
				-	struct list_head *ptr, *ptr2;
			
 
				-	struct kvm_assigned_dev_kernel *assigned_dev;
			
 
				-
			
 
				-	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
			
 
				-		assigned_dev = list_entry(ptr,
			
 
				-					  struct kvm_assigned_dev_kernel,
			
 
				-					  list);
			
 
				-
			
 
				-		kvm_free_assigned_device(kvm, assigned_dev);
			
 
				-	}
			
 
				-}
			
 
				-
			
 
				-static int assigned_device_enable_host_intx(struct kvm *kvm,
			
 
				-					    struct kvm_assigned_dev_kernel *dev)
			
 
				-{
			
 
				-	dev->host_irq = dev->dev->irq;
			
 
				-	/* Even though this is PCI, we don't want to use shared
			
 
				-	 * interrupts. Sharing host devices with guest-assigned devices
			
 
				-	 * on the same interrupt line is not a happy situation: there
			
 
				-	 * are going to be long delays in accepting, acking, etc.
			
 
				-	 */
			
 
				-	if (request_irq(dev->host_irq, kvm_assigned_dev_intr,
			
 
				-			0, "kvm_assigned_intx_device", (void *)dev))
			
 
				-		return -EIO;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#ifdef __KVM_HAVE_MSI
			
 
				-static int assigned_device_enable_host_msi(struct kvm *kvm,
			
 
				-					   struct kvm_assigned_dev_kernel *dev)
			
 
				-{
			
 
				-	int r;
			
 
				-
			
 
				-	if (!dev->dev->msi_enabled) {
			
 
				-		r = pci_enable_msi(dev->dev);
			
 
				-		if (r)
			
 
				-			return r;
			
 
				-	}
			
 
				-
			
 
				-	dev->host_irq = dev->dev->irq;
			
 
				-	if (request_irq(dev->host_irq, kvm_assigned_dev_intr, 0,
			
 
				-			"kvm_assigned_msi_device", (void *)dev)) {
			
 
				-		pci_disable_msi(dev->dev);
			
 
				-		return -EIO;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-static int assigned_device_enable_host_msix(struct kvm *kvm,
			
 
				-					    struct kvm_assigned_dev_kernel *dev)
			
 
				-{
			
 
				-	int i, r = -EINVAL;
			
 
				-
			
 
				-	/* host_msix_entries and guest_msix_entries should have been
			
 
				-	 * initialized */
			
 
				-	if (dev->entries_nr == 0)
			
 
				-		return r;
			
 
				-
			
 
				-	r = pci_enable_msix(dev->dev, dev->host_msix_entries, dev->entries_nr);
			
 
				-	if (r)
			
 
				-		return r;
			
 
				-
			
 
				-	for (i = 0; i < dev->entries_nr; i++) {
			
 
				-		r = request_irq(dev->host_msix_entries[i].vector,
			
 
				-				kvm_assigned_dev_intr, 0,
			
 
				-				"kvm_assigned_msix_device",
			
 
				-				(void *)dev);
			
 
				-		/* FIXME: free requested_irq's on failure */
			
 
				-		if (r)
			
 
				-			return r;
			
 
				-	}
			
 
				-
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#endif
			
 
				-
			
 
				-static int assigned_device_enable_guest_intx(struct kvm *kvm,
			
 
				-				struct kvm_assigned_dev_kernel *dev,
			
 
				-				struct kvm_assigned_irq *irq)
			
 
				-{
			
 
				-	dev->guest_irq = irq->guest_irq;
			
 
				-	dev->ack_notifier.gsi = irq->guest_irq;
			
 
				-	return 0;
			
 
				-}
			
 
				-
			
 
				-#ifdef __KVM_HAVE_MSI
			
 
				-static int assigned_device_enable_guest_msi(struct kvm *kvm,
			
 
				-			struct kvm_assigned_dev_kernel *dev,
			
 
				-			struct kvm_assigned_irq *irq)
			
 
				-{
			
 
				-	dev->guest_irq = irq->guest_irq;
			
 
				-	dev->ack_notifier.gsi = -1;
			
 
				-	dev->host_irq_disabled = false;
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-static int assigned_device_enable_guest_msix(struct kvm *kvm,
			
 
				-			struct kvm_assigned_dev_kernel *dev,
			
 
				-			struct kvm_assigned_irq *irq)
			
 
				-{
			
 
				-	dev->guest_irq = irq->guest_irq;
			
 
				-	dev->ack_notifier.gsi = -1;
			
 
				-	dev->host_irq_disabled = false;
			
 
				-	return 0;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-static int assign_host_irq(struct kvm *kvm,
			
 
				-			   struct kvm_assigned_dev_kernel *dev,
			
 
				-			   __u32 host_irq_type)
			
 
				-{
			
 
				-	int r = -EEXIST;
			
 
				-
			
 
				-	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
			
 
				-		return r;
			
 
				-
			
 
				-	switch (host_irq_type) {
			
 
				-	case KVM_DEV_IRQ_HOST_INTX:
			
 
				-		r = assigned_device_enable_host_intx(kvm, dev);
			
 
				-		break;
			
 
				-#ifdef __KVM_HAVE_MSI
			
 
				-	case KVM_DEV_IRQ_HOST_MSI:
			
 
				-		r = assigned_device_enable_host_msi(kvm, dev);
			
 
				-		break;
			
 
				-#endif
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-	case KVM_DEV_IRQ_HOST_MSIX:
			
 
				-		r = assigned_device_enable_host_msix(kvm, dev);
			
 
				-		break;
			
 
				-#endif
			
 
				-	default:
			
 
				-		r = -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (!r)
			
 
				-		dev->irq_requested_type |= host_irq_type;
			
 
				-
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static int assign_guest_irq(struct kvm *kvm,
			
 
				-			    struct kvm_assigned_dev_kernel *dev,
			
 
				-			    struct kvm_assigned_irq *irq,
			
 
				-			    unsigned long guest_irq_type)
			
 
				-{
			
 
				-	int id;
			
 
				-	int r = -EEXIST;
			
 
				-
			
 
				-	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
			
 
				-		return r;
			
 
				-
			
 
				-	id = kvm_request_irq_source_id(kvm);
			
 
				-	if (id < 0)
			
 
				-		return id;
			
 
				-
			
 
				-	dev->irq_source_id = id;
			
 
				-
			
 
				-	switch (guest_irq_type) {
			
 
				-	case KVM_DEV_IRQ_GUEST_INTX:
			
 
				-		r = assigned_device_enable_guest_intx(kvm, dev, irq);
			
 
				-		break;
			
 
				-#ifdef __KVM_HAVE_MSI
			
 
				-	case KVM_DEV_IRQ_GUEST_MSI:
			
 
				-		r = assigned_device_enable_guest_msi(kvm, dev, irq);
			
 
				-		break;
			
 
				-#endif
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-	case KVM_DEV_IRQ_GUEST_MSIX:
			
 
				-		r = assigned_device_enable_guest_msix(kvm, dev, irq);
			
 
				-		break;
			
 
				-#endif
			
 
				-	default:
			
 
				-		r = -EINVAL;
			
 
				-	}
			
 
				-
			
 
				-	if (!r) {
			
 
				-		dev->irq_requested_type |= guest_irq_type;
			
 
				-		kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
			
 
				-	} else
			
 
				-		kvm_free_irq_source_id(kvm, dev->irq_source_id);
			
 
				-
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
			
 
				-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
			
 
				-				   struct kvm_assigned_irq *assigned_irq)
			
 
				-{
			
 
				-	int r = -EINVAL;
			
 
				-	struct kvm_assigned_dev_kernel *match;
			
 
				-	unsigned long host_irq_type, guest_irq_type;
			
 
				-
			
 
				-	if (!capable(CAP_SYS_RAWIO))
			
 
				-		return -EPERM;
			
 
				-
			
 
				-	if (!irqchip_in_kernel(kvm))
			
 
				-		return r;
			
 
				-
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-	r = -ENODEV;
			
 
				-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      assigned_irq->assigned_dev_id);
			
 
				-	if (!match)
			
 
				-		goto out;
			
 
				-
			
 
				-	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
			
 
				-	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
			
 
				-
			
 
				-	r = -EINVAL;
			
 
				-	/* can only assign one type at a time */
			
 
				-	if (hweight_long(host_irq_type) > 1)
			
 
				-		goto out;
			
 
				-	if (hweight_long(guest_irq_type) > 1)
			
 
				-		goto out;
			
 
				-	if (host_irq_type == 0 && guest_irq_type == 0)
			
 
				-		goto out;
			
 
				-
			
 
				-	r = 0;
			
 
				-	if (host_irq_type)
			
 
				-		r = assign_host_irq(kvm, match, host_irq_type);
			
 
				-	if (r)
			
 
				-		goto out;
			
 
				-
			
 
				-	if (guest_irq_type)
			
 
				-		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
			
 
				-out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
			
 
				-					 struct kvm_assigned_irq
			
 
				-					 *assigned_irq)
			
 
				-{
			
 
				-	int r = -ENODEV;
			
 
				-	struct kvm_assigned_dev_kernel *match;
			
 
				-
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      assigned_irq->assigned_dev_id);
			
 
				-	if (!match)
			
 
				-		goto out;
			
 
				-
			
 
				-	r = kvm_deassign_irq(kvm, match, assigned_irq->flags);
			
 
				-out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
			
 
				-				      struct kvm_assigned_pci_dev *assigned_dev)
			
 
				-{
			
 
				-	int r = 0;
			
 
				-	struct kvm_assigned_dev_kernel *match;
			
 
				-	struct pci_dev *dev;
			
 
				-
			
 
				-	down_read(&kvm->slots_lock);
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      assigned_dev->assigned_dev_id);
			
 
				-	if (match) {
			
 
				-		/* device already assigned */
			
 
				-		r = -EEXIST;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
			
 
				-	if (match == NULL) {
			
 
				-		printk(KERN_INFO "%s: Couldn't allocate memory\n",
			
 
				-		       __func__);
			
 
				-		r = -ENOMEM;
			
 
				-		goto out;
			
 
				-	}
			
 
				-	dev = pci_get_bus_and_slot(assigned_dev->busnr,
			
 
				-				   assigned_dev->devfn);
			
 
				-	if (!dev) {
			
 
				-		printk(KERN_INFO "%s: host device not found\n", __func__);
			
 
				-		r = -EINVAL;
			
 
				-		goto out_free;
			
 
				-	}
			
 
				-	if (pci_enable_device(dev)) {
			
 
				-		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
			
 
				-		r = -EBUSY;
			
 
				-		goto out_put;
			
 
				-	}
			
 
				-	r = pci_request_regions(dev, "kvm_assigned_device");
			
 
				-	if (r) {
			
 
				-		printk(KERN_INFO "%s: Could not get access to device regions\n",
			
 
				-		       __func__);
			
 
				-		goto out_disable;
			
 
				-	}
			
 
				-
			
 
				-	pci_reset_function(dev);
			
 
				-
			
 
				-	match->assigned_dev_id = assigned_dev->assigned_dev_id;
			
 
				-	match->host_busnr = assigned_dev->busnr;
			
 
				-	match->host_devfn = assigned_dev->devfn;
			
 
				-	match->flags = assigned_dev->flags;
			
 
				-	match->dev = dev;
			
 
				-	spin_lock_init(&match->assigned_dev_lock);
			
 
				-	match->irq_source_id = -1;
			
 
				-	match->kvm = kvm;
			
 
				-	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
			
 
				-	INIT_WORK(&match->interrupt_work,
			
 
				-		  kvm_assigned_dev_interrupt_work_handler);
			
 
				-
			
 
				-	list_add(&match->list, &kvm->arch.assigned_dev_head);
			
 
				-
			
 
				-	if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
			
 
				-		if (!kvm->arch.iommu_domain) {
			
 
				-			r = kvm_iommu_map_guest(kvm);
			
 
				-			if (r)
			
 
				-				goto out_list_del;
			
 
				-		}
			
 
				-		r = kvm_assign_device(kvm, match);
			
 
				-		if (r)
			
 
				-			goto out_list_del;
			
 
				-	}
			
 
				-
			
 
				-out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	up_read(&kvm->slots_lock);
			
 
				-	return r;
			
 
				-out_list_del:
			
 
				-	list_del(&match->list);
			
 
				-	pci_release_regions(dev);
			
 
				-out_disable:
			
 
				-	pci_disable_device(dev);
			
 
				-out_put:
			
 
				-	pci_dev_put(dev);
			
 
				-out_free:
			
 
				-	kfree(match);
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	up_read(&kvm->slots_lock);
			
 
				-	return r;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				-#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
			
 
				-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
			
 
				-		struct kvm_assigned_pci_dev *assigned_dev)
			
 
				-{
			
 
				-	int r = 0;
			
 
				-	struct kvm_assigned_dev_kernel *match;
			
 
				-
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      assigned_dev->assigned_dev_id);
			
 
				-	if (!match) {
			
 
				-		printk(KERN_INFO "%s: device hasn't been assigned before, "
			
 
				-		  "so cannot be deassigned\n", __func__);
			
 
				-		r = -EINVAL;
			
 
				-		goto out;
			
 
				-	}
			
 
				-
			
 
				-	if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
			
 
				-		kvm_deassign_device(kvm, match);
			
 
				-
			
 
				-	kvm_free_assigned_device(kvm, match);
			
 
				-
			
 
				-out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	return r;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 inline int kvm_is_mmio_pfn(pfn_t pfn)
			
 
				 {
			
 
				 	if (pfn_valid(pfn)) {
			
@@ -949,6 +344,7 @@ static const struct mmu_notifier_ops kvm_mmu_notifier_ops = {
 
				 
			
 
				 static struct kvm *kvm_create_vm(void)
			
 
				 {
			
 
				+	int r = 0;
			
 
				 	struct kvm *kvm = kvm_arch_create_vm();
			
 
				 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
			
 
				 	struct page *page;
			
@@ -956,16 +352,21 @@ static struct kvm *kvm_create_vm(void)
 
				 
			
 
				 	if (IS_ERR(kvm))
			
 
				 		goto out;
			
 
				+
			
 
				+	r = hardware_enable_all();
			
 
				+	if (r)
			
 
				+		goto out_err_nodisable;
			
 
				+
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				-	INIT_LIST_HEAD(&kvm->irq_routing);
			
 
				 	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
			
 
				+	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
			
 
				 #endif
			
 
				 
			
 
				 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
			
 
				 	page = alloc_page(GFP_KERNEL | __GFP_ZERO);
			
 
				 	if (!page) {
			
 
				-		kfree(kvm);
			
 
				-		return ERR_PTR(-ENOMEM);
			
 
				+		r = -ENOMEM;
			
 
				+		goto out_err;
			
 
				 	}
			
 
				 	kvm->coalesced_mmio_ring =
			
 
				 			(struct kvm_coalesced_mmio_ring *)page_address(page);
			
@@ -973,15 +374,13 @@ static struct kvm *kvm_create_vm(void)
 
				 
			
 
				 #if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER)
			
 
				 	{
			
 
				-		int err;
			
 
				 		kvm->mmu_notifier.ops = &kvm_mmu_notifier_ops;
			
 
				-		err = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
			
 
				-		if (err) {
			
 
				+		r = mmu_notifier_register(&kvm->mmu_notifier, current->mm);
			
 
				+		if (r) {
			
 
				 #ifdef KVM_COALESCED_MMIO_PAGE_OFFSET
			
 
				 			put_page(page);
			
 
				 #endif
			
 
				-			kfree(kvm);
			
 
				-			return ERR_PTR(err);
			
 
				+			goto out_err;
			
 
				 		}
			
 
				 	}
			
 
				 #endif
			
@@ -1005,6 +404,12 @@ static struct kvm *kvm_create_vm(void)
 
				 #endif
			
 
				 out:
			
 
				 	return kvm;
			
 
				+
			
 
				+out_err:
			
 
				+	hardware_disable_all();
			
 
				+out_err_nodisable:
			
 
				+	kfree(kvm);
			
 
				+	return ERR_PTR(r);
			
 
				 }
			
 
				 
			
 
				 /*
			
@@ -1063,6 +468,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
 
				 	kvm_arch_flush_shadow(kvm);
			
 
				 #endif
			
 
				 	kvm_arch_destroy_vm(kvm);
			
 
				+	hardware_disable_all();
			
 
				 	mmdrop(mm);
			
 
				 }
			
 
				 
			
@@ -1689,9 +1095,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 
				 		if (signal_pending(current))
			
 
				 			break;
			
 
				 
			
 
				-		vcpu_put(vcpu);
			
 
				 		schedule();
			
 
				-		vcpu_load(vcpu);
			
 
				 	}
			
 
				 
			
 
				 	finish_wait(&vcpu->wq, &wait);
			
@@ -1705,6 +1109,21 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 
				 }
			
 
				 EXPORT_SYMBOL_GPL(kvm_resched);
			
 
				 
			
 
				+void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu)
			
 
				+{
			
 
				+	ktime_t expires;
			
 
				+	DEFINE_WAIT(wait);
			
 
				+
			
 
				+	prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE);
			
 
				+
			
 
				+	/* Sleep for 100 us, and hope lock-holder got scheduled */
			
 
				+	expires = ktime_add_ns(ktime_get(), 100000UL);
			
 
				+	schedule_hrtimeout(&expires, HRTIMER_MODE_ABS);
			
 
				+
			
 
				+	finish_wait(&vcpu->wq, &wait);
			
 
				+}
			
 
				+EXPORT_SYMBOL_GPL(kvm_vcpu_on_spin);
			
 
				+
			
 
				 static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct kvm_vcpu *vcpu = vma->vm_file->private_data;
			
@@ -1828,88 +1247,6 @@ static int kvm_vcpu_ioctl_set_sigmask(struct kvm_vcpu *vcpu, sigset_t *sigset)
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
			
 
				-				    struct kvm_assigned_msix_nr *entry_nr)
			
 
				-{
			
 
				-	int r = 0;
			
 
				-	struct kvm_assigned_dev_kernel *adev;
			
 
				-
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      entry_nr->assigned_dev_id);
			
 
				-	if (!adev) {
			
 
				-		r = -EINVAL;
			
 
				-		goto msix_nr_out;
			
 
				-	}
			
 
				-
			
 
				-	if (adev->entries_nr == 0) {
			
 
				-		adev->entries_nr = entry_nr->entry_nr;
			
 
				-		if (adev->entries_nr == 0 ||
			
 
				-		    adev->entries_nr >= KVM_MAX_MSIX_PER_DEV) {
			
 
				-			r = -EINVAL;
			
 
				-			goto msix_nr_out;
			
 
				-		}
			
 
				-
			
 
				-		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
			
 
				-						entry_nr->entry_nr,
			
 
				-						GFP_KERNEL);
			
 
				-		if (!adev->host_msix_entries) {
			
 
				-			r = -ENOMEM;
			
 
				-			goto msix_nr_out;
			
 
				-		}
			
 
				-		adev->guest_msix_entries = kzalloc(
			
 
				-				sizeof(struct kvm_guest_msix_entry) *
			
 
				-				entry_nr->entry_nr, GFP_KERNEL);
			
 
				-		if (!adev->guest_msix_entries) {
			
 
				-			kfree(adev->host_msix_entries);
			
 
				-			r = -ENOMEM;
			
 
				-			goto msix_nr_out;
			
 
				-		}
			
 
				-	} else /* Not allowed set MSI-X number twice */
			
 
				-		r = -EINVAL;
			
 
				-msix_nr_out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-	return r;
			
 
				-}
			
 
				-
			
 
				-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
			
 
				-				       struct kvm_assigned_msix_entry *entry)
			
 
				-{
			
 
				-	int r = 0, i;
			
 
				-	struct kvm_assigned_dev_kernel *adev;
			
 
				-
			
 
				-	mutex_lock(&kvm->lock);
			
 
				-
			
 
				-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
			
 
				-				      entry->assigned_dev_id);
			
 
				-
			
 
				-	if (!adev) {
			
 
				-		r = -EINVAL;
			
 
				-		goto msix_entry_out;
			
 
				-	}
			
 
				-
			
 
				-	for (i = 0; i < adev->entries_nr; i++)
			
 
				-		if (adev->guest_msix_entries[i].vector == 0 ||
			
 
				-		    adev->guest_msix_entries[i].entry == entry->entry) {
			
 
				-			adev->guest_msix_entries[i].entry = entry->entry;
			
 
				-			adev->guest_msix_entries[i].vector = entry->gsi;
			
 
				-			adev->host_msix_entries[i].entry = entry->entry;
			
 
				-			break;
			
 
				-		}
			
 
				-	if (i == adev->entries_nr) {
			
 
				-		r = -ENOSPC;
			
 
				-		goto msix_entry_out;
			
 
				-	}
			
 
				-
			
 
				-msix_entry_out:
			
 
				-	mutex_unlock(&kvm->lock);
			
 
				-
			
 
				-	return r;
			
 
				-}
			
 
				-#endif
			
 
				-
			
 
				 static long kvm_vcpu_ioctl(struct file *filp,
			
 
				 			   unsigned int ioctl, unsigned long arg)
			
 
				 {
			
@@ -2167,112 +1504,6 @@ static long kvm_vm_ioctl(struct file *filp,
 
				 		r = 0;
			
 
				 		break;
			
 
				 	}
			
 
				-#endif
			
 
				-#ifdef KVM_CAP_DEVICE_ASSIGNMENT
			
 
				-	case KVM_ASSIGN_PCI_DEVICE: {
			
 
				-		struct kvm_assigned_pci_dev assigned_dev;
			
 
				-
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				-	case KVM_ASSIGN_IRQ: {
			
 
				-		r = -EOPNOTSUPP;
			
 
				-		break;
			
 
				-	}
			
 
				-#ifdef KVM_CAP_ASSIGN_DEV_IRQ
			
 
				-	case KVM_ASSIGN_DEV_IRQ: {
			
 
				-		struct kvm_assigned_irq assigned_irq;
			
 
				-
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				-	case KVM_DEASSIGN_DEV_IRQ: {
			
 
				-		struct kvm_assigned_irq assigned_irq;
			
 
				-
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				-#endif
			
 
				-#endif
			
 
				-#ifdef KVM_CAP_DEVICE_DEASSIGNMENT
			
 
				-	case KVM_DEASSIGN_PCI_DEVICE: {
			
 
				-		struct kvm_assigned_pci_dev assigned_dev;
			
 
				-
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				-#endif
			
 
				-#ifdef KVM_CAP_IRQ_ROUTING
			
 
				-	case KVM_SET_GSI_ROUTING: {
			
 
				-		struct kvm_irq_routing routing;
			
 
				-		struct kvm_irq_routing __user *urouting;
			
 
				-		struct kvm_irq_routing_entry *entries;
			
 
				-
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&routing, argp, sizeof(routing)))
			
 
				-			goto out;
			
 
				-		r = -EINVAL;
			
 
				-		if (routing.nr >= KVM_MAX_IRQ_ROUTES)
			
 
				-			goto out;
			
 
				-		if (routing.flags)
			
 
				-			goto out;
			
 
				-		r = -ENOMEM;
			
 
				-		entries = vmalloc(routing.nr * sizeof(*entries));
			
 
				-		if (!entries)
			
 
				-			goto out;
			
 
				-		r = -EFAULT;
			
 
				-		urouting = argp;
			
 
				-		if (copy_from_user(entries, urouting->entries,
			
 
				-				   routing.nr * sizeof(*entries)))
			
 
				-			goto out_free_irq_routing;
			
 
				-		r = kvm_set_irq_routing(kvm, entries, routing.nr,
			
 
				-					routing.flags);
			
 
				-	out_free_irq_routing:
			
 
				-		vfree(entries);
			
 
				-		break;
			
 
				-	}
			
 
				-#endif /* KVM_CAP_IRQ_ROUTING */
			
 
				-#ifdef __KVM_HAVE_MSIX
			
 
				-	case KVM_ASSIGN_SET_MSIX_NR: {
			
 
				-		struct kvm_assigned_msix_nr entry_nr;
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				-	case KVM_ASSIGN_SET_MSIX_ENTRY: {
			
 
				-		struct kvm_assigned_msix_entry entry;
			
 
				-		r = -EFAULT;
			
 
				-		if (copy_from_user(&entry, argp, sizeof entry))
			
 
				-			goto out;
			
 
				-		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
			
 
				-		if (r)
			
 
				-			goto out;
			
 
				-		break;
			
 
				-	}
			
 
				 #endif
			
 
				 	case KVM_IRQFD: {
			
 
				 		struct kvm_irqfd data;
			
@@ -2305,11 +1536,59 @@ static long kvm_vm_ioctl(struct file *filp,
 
				 #endif
			
 
				 	default:
			
 
				 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
			
 
				+		if (r == -ENOTTY)
			
 
				+			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
			
 
				 	}
			
 
				 out:
			
 
				 	return r;
			
 
				 }
			
 
				 
			
 
				+#ifdef CONFIG_COMPAT
			
 
				+struct compat_kvm_dirty_log {
			
 
				+	__u32 slot;
			
 
				+	__u32 padding1;
			
 
				+	union {
			
 
				+		compat_uptr_t dirty_bitmap; /* one bit per page */
			
 
				+		__u64 padding2;
			
 
				+	};
			
 
				+};
			
 
				+
			
 
				+static long kvm_vm_compat_ioctl(struct file *filp,
			
 
				+			   unsigned int ioctl, unsigned long arg)
			
 
				+{
			
 
				+	struct kvm *kvm = filp->private_data;
			
 
				+	int r;
			
 
				+
			
 
				+	if (kvm->mm != current->mm)
			
 
				+		return -EIO;
			
 
				+	switch (ioctl) {
			
 
				+	case KVM_GET_DIRTY_LOG: {
			
 
				+		struct compat_kvm_dirty_log compat_log;
			
 
				+		struct kvm_dirty_log log;
			
 
				+
			
 
				+		r = -EFAULT;
			
 
				+		if (copy_from_user(&compat_log, (void __user *)arg,
			
 
				+				   sizeof(compat_log)))
			
 
				+			goto out;
			
 
				+		log.slot	 = compat_log.slot;
			
 
				+		log.padding1	 = compat_log.padding1;
			
 
				+		log.padding2	 = compat_log.padding2;
			
 
				+		log.dirty_bitmap = compat_ptr(compat_log.dirty_bitmap);
			
 
				+
			
 
				+		r = kvm_vm_ioctl_get_dirty_log(kvm, &log);
			
 
				+		if (r)
			
 
				+			goto out;
			
 
				+		break;
			
 
				+	}
			
 
				+	default:
			
 
				+		r = kvm_vm_ioctl(filp, ioctl, arg);
			
 
				+	}
			
 
				+
			
 
				+out:
			
 
				+	return r;
			
 
				+}
			
 
				+#endif
			
 
				+
			
 
				 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
			
 
				 {
			
 
				 	struct page *page[1];
			
@@ -2344,7 +1623,9 @@ static int kvm_vm_mmap(struct file *file, struct vm_area_struct *vma)
 
				 static struct file_operations kvm_vm_fops = {
			
 
				 	.release        = kvm_vm_release,
			
 
				 	.unlocked_ioctl = kvm_vm_ioctl,
			
 
				-	.compat_ioctl   = kvm_vm_ioctl,
			
 
				+#ifdef CONFIG_COMPAT
			
 
				+	.compat_ioctl   = kvm_vm_compat_ioctl,
			
 
				+#endif
			
 
				 	.mmap           = kvm_vm_mmap,
			
 
				 };
			
 
				 
			
@@ -2372,6 +1653,7 @@ static long kvm_dev_ioctl_check_extension_generic(long arg)
 
				 #ifdef CONFIG_KVM_APIC_ARCHITECTURE
			
 
				 	case KVM_CAP_SET_BOOT_CPU_ID:
			
 
				 #endif
			
 
				+	case KVM_CAP_INTERNAL_ERROR_DATA:
			
 
				 		return 1;
			
 
				 #ifdef CONFIG_HAVE_KVM_IRQCHIP
			
 
				 	case KVM_CAP_IRQ_ROUTING:
			
@@ -2442,11 +1724,21 @@ static struct miscdevice kvm_dev = {
 
				 static void hardware_enable(void *junk)
			
 
				 {
			
 
				 	int cpu = raw_smp_processor_id();
			
 
				+	int r;
			
 
				 
			
 
				 	if (cpumask_test_cpu(cpu, cpus_hardware_enabled))
			
 
				 		return;
			
 
				+
			
 
				 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
			
 
				-	kvm_arch_hardware_enable(NULL);
			
 
				+
			
 
				+	r = kvm_arch_hardware_enable(NULL);
			
 
				+
			
 
				+	if (r) {
			
 
				+		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
			
 
				+		atomic_inc(&hardware_enable_failed);
			
 
				+		printk(KERN_INFO "kvm: enabling virtualization on "
			
 
				+				 "CPU%d failed\n", cpu);
			
 
				+	}
			
 
				 }
			
 
				 
			
 
				 static void hardware_disable(void *junk)
			
@@ -2459,11 +1751,52 @@ static void hardware_disable(void *junk)
 
				 	kvm_arch_hardware_disable(NULL);
			
 
				 }
			
 
				 
			
 
				+static void hardware_disable_all_nolock(void)
			
 
				+{
			
 
				+	BUG_ON(!kvm_usage_count);
			
 
				+
			
 
				+	kvm_usage_count--;
			
 
				+	if (!kvm_usage_count)
			
 
				+		on_each_cpu(hardware_disable, NULL, 1);
			
 
				+}
			
 
				+
			
 
				+static void hardware_disable_all(void)
			
 
				+{
			
 
				+	spin_lock(&kvm_lock);
			
 
				+	hardware_disable_all_nolock();
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+}
			
 
				+
			
 
				+static int hardware_enable_all(void)
			
 
				+{
			
 
				+	int r = 0;
			
 
				+
			
 
				+	spin_lock(&kvm_lock);
			
 
				+
			
 
				+	kvm_usage_count++;
			
 
				+	if (kvm_usage_count == 1) {
			
 
				+		atomic_set(&hardware_enable_failed, 0);
			
 
				+		on_each_cpu(hardware_enable, NULL, 1);
			
 
				+
			
 
				+		if (atomic_read(&hardware_enable_failed)) {
			
 
				+			hardware_disable_all_nolock();
			
 
				+			r = -EBUSY;
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	spin_unlock(&kvm_lock);
			
 
				+
			
 
				+	return r;
			
 
				+}
			
 
				+
			
 
				 static int kvm_cpu_hotplug(struct notifier_block *notifier, unsigned long val,
			
 
				 			   void *v)
			
 
				 {
			
 
				 	int cpu = (long)v;
			
 
				 
			
 
				+	if (!kvm_usage_count)
			
 
				+		return NOTIFY_OK;
			
 
				+
			
 
				 	val &= ~CPU_TASKS_FROZEN;
			
 
				 	switch (val) {
			
 
				 	case CPU_DYING:
			
@@ -2666,13 +1999,15 @@ static void kvm_exit_debug(void)
 
				 
			
 
				 static int kvm_suspend(struct sys_device *dev, pm_message_t state)
			
 
				 {
			
 
				-	hardware_disable(NULL);
			
 
				+	if (kvm_usage_count)
			
 
				+		hardware_disable(NULL);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
 
				 static int kvm_resume(struct sys_device *dev)
			
 
				 {
			
 
				-	hardware_enable(NULL);
			
 
				+	if (kvm_usage_count)
			
 
				+		hardware_enable(NULL);
			
 
				 	return 0;
			
 
				 }
			
 
				 
			
@@ -2747,7 +2082,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
				 			goto out_free_1;
			
 
				 	}
			
 
				 
			
 
				-	on_each_cpu(hardware_enable, NULL, 1);
			
 
				 	r = register_cpu_notifier(&kvm_cpu_notifier);
			
 
				 	if (r)
			
 
				 		goto out_free_2;
			
@@ -2797,7 +2131,6 @@ int kvm_init(void *opaque, unsigned int vcpu_size,
 
				 	unregister_reboot_notifier(&kvm_reboot_notifier);
			
 
				 	unregister_cpu_notifier(&kvm_cpu_notifier);
			
 
				 out_free_2:
			
 
				-	on_each_cpu(hardware_disable, NULL, 1);
			
 
				 out_free_1:
			
 
				 	kvm_arch_hardware_unsetup();
			
 
				 out_free_0a: